root/tools/firmware_load_ng.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. addBufRange
  2. findRanges
  3. getBufRangeForIndex
  4. find_Nth_str
  5. find_str
  6. find_next_bytes_range
  7. find_bytes_all
  8. find_next_substr_bytes
  9. find_next_str_bytes_range
  10. find_str_bytes_main_fw
  11. find_next_str_bytes
  12. find_str_bytes
  13. isASCIIstring
  14. adr_get_range
  15. adr_get_range_type
  16. ptr2adr
  17. adr2ptr
  18. adr2ptr_with_data
  19. adr_range_type_str
  20. adr_range_desc_str
  21. adr_is_var
  22. adr_is_main_fw_code
  23. find_u32_adr_range
  24. find_u32_adr
  25. fw_u32
  26. fw_memcmp
  27. adr_hist_reset
  28. adr_hist_index
  29. adr_hist_add
  30. adr_hist_get
  31. isARM
  32. isLDR_PC
  33. isLDR_PC_PC
  34. isSUBW_PC
  35. isADDW_PC
  36. isADD_PC
  37. isSUB_PC
  38. isRETx
  39. isPUSH_LR
  40. isPOP_LR
  41. isPOP_PC
  42. isADDx_imm
  43. isSUBx_imm
  44. isADRx
  45. LDR_PC2valptr_thumb
  46. LDR_PC2valptr_arm
  47. LDR_PC2valptr
  48. LDR_PC2adr
  49. ADRx2adr
  50. ADR2adr
  51. ADR2valptr
  52. LDR_PC2val
  53. LDR_PC_PC_target
  54. B_target
  55. CBx_target
  56. BLXimm_target
  57. BL_target
  58. B_BL_target
  59. B_BL_BLXimm_target
  60. get_TBx_PC_info
  61. disasm_iter_new
  62. disasm_iter_free
  63. disasm_iter_set
  64. disasm_iter_init
  65. disasm_iter
  66. disasm_iter_redo
  67. fw_disasm_iter_start
  68. fw_disasm_iter
  69. fw_disasm_iter_single
  70. fw_disasm_adr
  71. fw_search_insn
  72. search_disasm_const_ref
  73. search_disasm_str_ref
  74. search_disasm_calls
  75. search_calls_multi_end
  76. search_disasm_calls_multi
  77. search_disasm_calls_veneer_multi
  78. get_call_const_args
  79. get_direct_jump_target
  80. get_branch_call_insn_target
  81. find_and_get_var_ldr
  82. check_simple_func
  83. find_last_call_from_func
  84. insn_match_seq
  85. insn_match
  86. insn_match_any
  87. insn_match_find_next
  88. insn_match_find_nth
  89. insn_match_find_next_seq
  90. fw_search_bytes
  91. fw_add_adr_range
  92. find_dryos_vers
  93. firmware_load
  94. do_blx_check
  95. firmware_init_capstone
  96. find_startup_copy
  97. find_exception_vec
  98. firmware_init_data_ranges
  99. firmware_unload

   1 #include <inttypes.h>
   2 #include <stdio.h>
   3 #include <stdint.h>
   4 #include <string.h>
   5 
   6 #include <capstone.h>
   7 
   8 #include "stubs_load.h" // needed for sv in fw struct
   9 #include "firmware_load_ng.h"
  10 
  11 
  12 // Add a valid range to the list
  13 static void addBufRange(firmware *fw, int o, int l)
  14 {
  15     BufRange *n = malloc(sizeof(BufRange));
  16     n->p = fw->buf32 + o;
  17     n->off = o;
  18     n->len = l;
  19     n->next = 0;
  20     if (fw->br == 0)
  21     {
  22         fw->br = n;
  23     }
  24     else
  25     {
  26         fw->last->next = n;
  27     }
  28     fw->last = n;
  29 }
  30 
  31 // Find valid ranges for the firmware dump
  32 static void findRanges(firmware *fw)
  33 {
  34     int i, j, k;
  35 
  36     // Find all the valid ranges for checking (skips over large blocks of 0xFFFFFFFF)
  37     fw->br = 0; fw->last = 0;
  38     k = -1; j = 0;
  39     for (i = 0; i < fw->size32; i++)
  40     {
  41         if (fw->buf32[i] == 0xFFFFFFFF)   // Possible start of block to skip
  42         {
  43             if (k == -1)            // Mark start of possible skip block
  44             {
  45                 k = i;
  46             }
  47         }
  48         else                        // Found end of block ?
  49         {
  50             if (k != -1)
  51             {
  52                 if (i - k > 32)     // If block more than 32 words then we want to skip it
  53                 {
  54                     if (k - j > 8)
  55                     {
  56                         // Add a range record for the previous valid range (ignore short ranges)
  57                         addBufRange(fw,j,k - j);
  58                     }
  59                     j = i;          // Reset valid range start to current position
  60                 }
  61                 k = -1;             // Reset marker for skip block
  62             }
  63         }
  64     }
  65     // Add range for last valid block
  66     if (k != -1)
  67     {
  68         if (k - j > 8)
  69         {
  70             addBufRange(fw,j,k - j);
  71         }
  72     }
  73     else
  74     {
  75         if (i - j > 8)
  76         {
  77             addBufRange(fw,j,i - j);
  78         }
  79     }
  80 }
  81 
  82 // return the buffrange for a given offset or null if not found
  83 BufRange *getBufRangeForIndex(firmware *fw,int i)
  84 {
  85     BufRange *br = fw->br;
  86     while (br) {
  87         if(i >= br->off && i < br->off + br->len) {
  88             return br;
  89         }
  90         br = br->next;
  91     }
  92     return NULL;
  93 }
  94 
  95 // Find the index of a string in the firmware
  96 // Assumes the string starts on a 32bit boundary.
  97 // String + terminating zero byte should be at least 4 bytes long
  98 // Handles multiple string instances
  99 int find_Nth_str(firmware *fw, char *str, int N)
 100 {
 101     int nlen = strlen(str);
 102     uint32_t nm0 = *((uint32_t*)str);
 103     uint32_t *p;
 104     int j;
 105 
 106     BufRange *br = fw->br;
 107     while (br)
 108     {
 109         for (p = br->p, j = 0; j < br->len - nlen/4; j++, p++)
 110         {
 111             if ((nm0 == *p) && ((nlen<=4) || (memcmp(p+1,str+4,nlen-4) == 0)) )
 112             {
 113                 if (--N == 0)
 114                     return j+br->off;
 115             }
 116         }
 117         br = br->next;
 118     }
 119 
 120     return -1;
 121 }
 122 
 123 int find_str(firmware *fw, char *str)
 124 {
 125     return find_Nth_str(fw, str, 1);
 126 }
 127 
 128 // find sequence of bytes, starting from star_adr, up to max_adr, any alignment
 129 // returns firmware address or 0
 130 // use repeated calls to find multiple
 131 // NOTE only handles ROM addresses
 132 uint32_t find_next_bytes_range(firmware *fw, const void *bytes, size_t len, uint32_t start_adr, uint32_t max_adr)
 133 {
 134     if(!start_adr) {
 135         start_adr = fw->base;
 136     }
 137     if(start_adr < fw->base || start_adr >= fw->base + fw->size8) {
 138         fprintf(stderr,"find_next_bytes_range invalid start_adr 0x%08x\n",start_adr);
 139         return 0;
 140     }
 141     if(!max_adr) {
 142         max_adr = fw->base + fw->size8-1;
 143     }
 144     if(max_adr < fw->base || max_adr >= fw->base + fw->size8) {
 145         fprintf(stderr,"find_next_bytes_range invalid max_adr 0x%08x\n",max_adr);
 146         return 0;
 147     }
 148     int end_k = (max_adr - fw->base);
 149     BufRange *p = getBufRangeForIndex(fw,(start_adr - fw->base)/4);
 150     if(!p) {
 151         return 0;
 152     }
 153     int k = start_adr - fw->base;
 154 
 155     while (k < end_k)
 156     {
 157         for (; k < (p->off + p->len)*4; k++)
 158         {
 159             if (memcmp(fw->buf8+k,bytes,len) == 0) {
 160                 return fw->base+k;
 161             }
 162         }
 163         p = p->next;
 164         if(!p) {
 165             break;
 166         }
 167         k = p->off*4;
 168     }
 169     return 0;
 170 }
 171 
 172 // find up to max matching byte sequences, storing addresses in result
 173 // returns count
 174 int find_bytes_all(firmware *fw, const void *bytes, size_t len, uint32_t adr, uint32_t *result, int max)
 175 {
 176     int i;
 177     for(i=0,adr=find_next_bytes_range(fw,bytes,len,0,0); adr && (i < max); adr=find_next_bytes_range(fw,bytes,len,adr+len,0),i++) {
 178         result[i] = adr;
 179     }
 180     return i;
 181 }
 182 
 183 uint32_t find_next_substr_bytes(firmware *fw, const char *str, uint32_t adr)
 184 {
 185     //fprintf(stderr,"find_next_substr_bytes 0x%08x\n",adr);
 186     // strlen excludes null
 187     return find_next_bytes_range(fw,str,strlen(str),adr,0);
 188 }
 189 
 190 uint32_t find_next_str_bytes_range(firmware *fw, const char *str, uint32_t adr,uint32_t max_adr)
 191 {
 192     // +1 to include the null in memcmp
 193     return find_next_bytes_range(fw,str,strlen(str)+1,adr,max_adr);
 194 }
 195 
 196 // find a string within range of LDR pc or ADR, starting from main fw
 197 uint32_t find_str_bytes_main_fw(firmware *fw, const char *str)
 198 {
 199     // max is end of fw code + 4096, assuming it fits in fw
 200     // while early code could technically load from base - 1k, unlikely
 201     uint32_t max_adr;
 202     if(fw->base + fw->size8 - 4096 > fw->rom_code_search_max_adr) {
 203         max_adr = fw->rom_code_search_max_adr + 4096;
 204     } else {
 205         max_adr = fw->base + fw->size8;
 206     }
 207     // +1 to include the null in memcmp
 208     return find_next_bytes_range(fw,str,strlen(str)+1,fw->rom_code_search_min_adr,max_adr);
 209 }
 210 
 211 uint32_t find_next_str_bytes(firmware *fw, const char *str, uint32_t adr)
 212 {
 213     // +1 to include the null in memcmp
 214     return find_next_bytes_range(fw,str,strlen(str)+1,adr,0);
 215 }
 216 
 217 // Find the index of a string in the firmware, can start at any address
 218 // returns firmware address
 219 uint32_t find_str_bytes(firmware *fw, const char *str)
 220 {
 221     return find_next_str_bytes(fw,str,fw->base);
 222 }
 223 
 224 int isASCIIstring(firmware *fw, uint32_t adr)
 225 {
 226     unsigned char *p = (unsigned char*)adr2ptr_with_data(fw, adr);
 227     if(!p) {
 228         return 0;
 229     }
 230     // TODO should avoid running off end of dump
 231     int i;
 232     for (i = 0; (i < 100) && (p[i] != 0); i++)
 233     {
 234         if (!((p[i] == '\r') || (p[i] == '\n') || (p[i] == '\t') || ((p[i] >= 0x20) && (p[i] <= 0x7f))))
 235         {
 236             return 0;
 237         }
 238     }
 239     if ((i >= 2) && (p[i] == 0))
 240         return 1;
 241     return 0;
 242 }
 243 
 244 // return address range struct for adr, or NULL if not in known range
 245 adr_range_t *adr_get_range(firmware *fw, uint32_t adr)
 246 {
 247     int i;
 248     adr_range_t *r=fw->adr_ranges;
 249     for(i=0;i<fw->adr_range_count;i++) {
 250         if(adr >= r->start && adr < r->start + r->bytes) {
 251             return r;
 252         }
 253         r++;
 254     }
 255     return NULL;
 256 }
 257 
 258 // return what kind of range adr is in
 259 int adr_get_range_type(firmware *fw, uint32_t adr)
 260 {
 261     adr_range_t *r=adr_get_range(fw,adr);
 262     if(!r) {
 263         return ADR_RANGE_INVALID;
 264     }
 265     return r->type;
 266 }
 267 
 268 uint32_t ptr2adr(firmware *fw, uint8_t *ptr)
 269 {
 270     // TODO handle copied, or maybe another func to convert?
 271     return (ptr-fw->buf8)+fw->base;
 272 }
 273 
 274 uint8_t* adr2ptr(firmware *fw, uint32_t adr)
 275 {
 276     adr_range_t *r=adr_get_range(fw,adr);
 277     if(!r) {
 278         return NULL;
 279     }
 280     switch(r->type) {
 281         case ADR_RANGE_RAM_CODE:
 282         case ADR_RANGE_ROM:
 283             return (r->buf)+(adr - r->start);
 284         default:
 285             return NULL;
 286     }
 287 }
 288 
 289 uint8_t* adr2ptr_with_data(firmware *fw, uint32_t adr)
 290 {
 291     adr_range_t *r=adr_get_range(fw,adr);
 292     if(!r) {
 293         return NULL;
 294     }
 295     switch(r->type) {
 296         case ADR_RANGE_RAM_CODE:
 297         case ADR_RANGE_INIT_DATA:
 298         case ADR_RANGE_ROM:
 299             return (r->buf)+(adr - r->start);
 300         default:
 301             return NULL;
 302     }
 303 }
 304 
 305 // return constant string describing type
 306 const char* adr_range_type_str(int type)
 307 {
 308     switch(type) {
 309         case ADR_RANGE_INVALID:
 310             return "(invalid)";
 311         case ADR_RANGE_ROM:
 312             return "ROM";
 313         case ADR_RANGE_RAM_CODE:
 314             return "RAM code";
 315         case ADR_RANGE_INIT_DATA:
 316             return "RAM data";
 317         default:
 318             return "(unknown)";
 319     }
 320 }
 321 
 322 // return constant string describing type and flags
 323 const char* adr_range_desc_str(adr_range_t *r)
 324 {
 325     switch(r->type) {
 326         case ADR_RANGE_INVALID:
 327             return "(invalid)";
 328         case ADR_RANGE_ROM:
 329             return "ROM";
 330         case ADR_RANGE_RAM_CODE:
 331             if(r->flags & ADR_RANGE_FL_EVEC) {
 332                 return "EVEC";
 333             } else if(r->flags & ADR_RANGE_FL_TCM) {
 334                 return "TCM code";
 335             }
 336             return "RAM code";
 337         case ADR_RANGE_INIT_DATA:
 338             return "RAM data";
 339         default:
 340             return "(unknown)";
 341     }
 342 }
 343 
 344 // return true if adr is in firmware DATA or BSS
 345 int adr_is_var(firmware *fw, uint32_t adr)
 346 {
 347     return (adr > fw->data_start && adr < fw->memisostart);
 348 }
 349 
 350 // return true if adr is in the ROM search range, or one of the copied RAM code regions
 351 int adr_is_main_fw_code(firmware *fw, uint32_t adr)
 352 {
 353     int adr_type = adr_get_range_type(fw,adr);
 354     if(adr_type == ADR_RANGE_RAM_CODE) {
 355         return 1;
 356     }
 357     if(adr_type != ADR_RANGE_ROM) {
 358         return 0;
 359     }
 360     if(adr < fw->rom_code_search_min_adr  || adr > fw->rom_code_search_max_adr) {
 361         return 0;
 362     }
 363     return 1;
 364 }
 365 
 366 /*
 367 return firmware address of 32 bit value, starting at address "start", up to max
 368 */
 369 uint32_t find_u32_adr_range(firmware *fw, uint32_t val, uint32_t start,uint32_t maxadr)
 370 {
 371     // TODO
 372     if(start == 0) {
 373         start=fw->base;
 374     }
 375     if(start & 3) {
 376         fprintf(stderr,"find_u32_adr unaligned start 0x%08x\n",start);
 377         return 0;
 378     }
 379     uint32_t *p=(uint32_t *)adr2ptr(fw,start);
 380     if(!p) {
 381         fprintf(stderr,"find_u32_adr bad start 0x%08x\n",start);
 382         return 0;
 383     }
 384     uint32_t *p_end;
 385     if(maxadr) {
 386         p_end = (uint32_t *)adr2ptr(fw,maxadr);
 387     } else {
 388         p_end = fw->buf32 + fw->size32 - 1;
 389     }
 390     // TODO should use buf ranges
 391     while(p<=p_end) {
 392         if(*p==val) {
 393             return ptr2adr(fw,(uint8_t *)p);
 394         }
 395         p++;
 396     }
 397     return 0;
 398 }
 399 
 400 // as above, full to end of fw
 401 uint32_t find_u32_adr(firmware *fw, uint32_t val, uint32_t start)
 402 {
 403     return find_u32_adr_range(fw,val,start, fw->base + (fw->size8 -4));
 404 }
 405 
 406 // return u32 value at adr
 407 uint32_t fw_u32(firmware *fw, uint32_t adr)
 408 {
 409     uint32_t *p=(uint32_t *)adr2ptr(fw,adr);
 410     if(!p) {
 411         fprintf(stderr,"fw_u32 bad adr 0x%08x\n",adr);
 412         return 0;
 413     }
 414     return *p;
 415 }
 416 
 417 // memcmp, but using a firmware address, returning 1 adr/size out of range
 418 int fw_memcmp(firmware *fw, uint32_t adr,const void *cmp, size_t n)
 419 {
 420     uint32_t *p=(uint32_t *)adr2ptr(fw,adr);
 421     if(!p) {
 422         return 1;
 423     }
 424     if(n >= fw->size8 - (adr - fw->base)) {
 425         return 1;
 426     }
 427     return memcmp(p,cmp,n);
 428 }
 429 
 430 
 431 // ****** address history functions ******
 432 // reset address history to empty
 433 void adr_hist_reset(adr_hist_t *ah)
 434 {
 435     ah->cur=0;
 436     ah->count=0;
 437     // memset shouldn't be needed
 438     // memset(ah->adrs,0,ADR_HIST_SIZE*4);
 439 }
 440 
 441 // return the index of current entry + i. may be negative or positive, wraps. Does not check validity
 442 int adr_hist_index(adr_hist_t *ah, int i)
 443 {
 444     int r=(ah->cur+i)%ADR_HIST_SIZE;
 445     if(r < 0) {
 446         return ADR_HIST_SIZE + r;
 447     }
 448     return r;
 449 }
 450 
 451 // add an entry to address history
 452 void adr_hist_add(adr_hist_t *ah, uint32_t adr)
 453 {
 454     ah->cur=adr_hist_index(ah,1);
 455     ah->adrs[ah->cur]=adr;
 456     if(ah->count < ADR_HIST_SIZE)  {
 457         ah->count++;
 458     }
 459 }
 460 
 461 // return the i'th previous entry in this history, or 0 if not valid (maybe should be -1?)
 462 // i= 0 = most recently disassembled instruction, if any
 463 uint32_t adr_hist_get(adr_hist_t *ah, int i)
 464 {
 465     if(!ah->count || i > ah->count) {
 466         return 0;
 467     }
 468     return ah->adrs[adr_hist_index(ah,-i)];
 469 }
 470 
 471 // ****** instruction analysis utilities ******
 472 // is insn an ARM instruction?
 473 // like cs_insn_group(cs_handle,insn,ARM_GRP_ARM) but doesn't require handle and doesn't check or report errors
 474 int isARM(cs_insn *insn)
 475 {
 476     int i;
 477     for(i=0;i<insn->detail->groups_count;i++) {
 478         if(insn->detail->groups[i] == ARM_GRP_ARM) {
 479             return 1;
 480         }
 481     }
 482     return 0;
 483 }
 484 
 485 /*
 486 is insn a PC relative load?
 487 */
 488 int isLDR_PC(cs_insn *insn)
 489 {
 490     return insn->id == ARM_INS_LDR
 491            && insn->detail->arm.op_count == 2
 492            && insn->detail->arm.operands[0].type == ARM_OP_REG
 493            && insn->detail->arm.operands[1].type == ARM_OP_MEM
 494            && insn->detail->arm.operands[1].mem.base == ARM_REG_PC;
 495 
 496 }
 497 
 498 /*
 499 is insn a PC relative load to PC?
 500 */
 501 int isLDR_PC_PC(cs_insn *insn)
 502 {
 503     if(!isLDR_PC(insn)) {
 504         return 0;
 505     }
 506     return (insn->detail->arm.operands[0].reg == ARM_REG_PC);
 507 }
 508 
 509 //  subw    rd, pc, #x?
 510 int isSUBW_PC(cs_insn *insn)
 511 {
 512     return(insn->id == ARM_INS_SUBW
 513        && insn->detail->arm.op_count == 3
 514        && insn->detail->arm.operands[0].type == ARM_OP_REG
 515        && insn->detail->arm.operands[0].reg != ARM_REG_PC
 516        && insn->detail->arm.operands[1].type == ARM_OP_REG
 517        && insn->detail->arm.operands[1].reg == ARM_REG_PC
 518        && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 519 }
 520 
 521 //  addw    rd, pc, #x?
 522 int isADDW_PC(cs_insn *insn)
 523 {
 524     return(insn->id == ARM_INS_ADDW
 525        && insn->detail->arm.op_count == 3
 526        && insn->detail->arm.operands[0].type == ARM_OP_REG
 527        && insn->detail->arm.operands[0].reg != ARM_REG_PC
 528        && insn->detail->arm.operands[1].type == ARM_OP_REG
 529        && insn->detail->arm.operands[1].reg == ARM_REG_PC
 530        && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 531 }
 532 
 533 // is insn ADD rd, pc, #x  (only generated for ARM in capstone)
 534 int isADD_PC(cs_insn *insn)
 535 {
 536     return (insn->id == ARM_INS_ADD
 537             && insn->detail->arm.op_count == 3
 538             && insn->detail->arm.operands[0].reg != ARM_REG_PC
 539             && insn->detail->arm.operands[1].type == ARM_OP_REG
 540             && insn->detail->arm.operands[1].reg == ARM_REG_PC
 541             && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 542 }
 543 
 544 // is insn SUB rd, pc, #x  (only generated for ARM in capstone)
 545 int isSUB_PC(cs_insn *insn)
 546 {
 547     return (insn->id == ARM_INS_SUB
 548             && insn->detail->arm.op_count == 3
 549             && insn->detail->arm.operands[0].reg != ARM_REG_PC
 550             && insn->detail->arm.operands[1].type == ARM_OP_REG
 551             && insn->detail->arm.operands[1].reg == ARM_REG_PC
 552             && insn->detail->arm.operands[2].type == ARM_OP_IMM);
 553 }
 554 
 555 // does insn look like a function return?
 556 int isRETx(cs_insn *insn)
 557 {
 558     // BX LR
 559     if(insn->id == ARM_INS_BX
 560             && insn->detail->arm.op_count == 1
 561             && insn->detail->arm.operands[0].type == ARM_OP_REG
 562             && insn->detail->arm.operands[0].reg == ARM_REG_LR) {
 563         return 1;
 564     }
 565 
 566     // TODO LDR pc, [sp], imm is somewhat common, but could also be function pointer call
 567 
 568     // POP. capstone translates LDMFD   SP!,... in arm code to pop
 569     if(insn->id == ARM_INS_POP) {
 570         int i;
 571         for(i=0; i < insn->detail->arm.op_count; i++) {
 572             if(insn->detail->arm.operands[i].type == ARM_OP_REG
 573                 && insn->detail->arm.operands[i].reg == ARM_REG_PC) {
 574                 return 1;
 575             }
 576         }
 577     }
 578     // MOV PC, LR (some tools translate this to RET)
 579     if(insn->id == ARM_INS_MOV
 580             && insn->detail->arm.operands[0].type == ARM_OP_REG
 581             && insn->detail->arm.operands[0].reg == ARM_REG_PC
 582             && insn->detail->arm.operands[1].type == ARM_OP_REG
 583             && insn->detail->arm.operands[1].reg == ARM_REG_LR) {
 584         return 1;
 585     }
 586     return 0;
 587 }
 588 
 589 // does insn push LR (function start -ish)
 590 int isPUSH_LR(cs_insn *insn)
 591 {
 592     if(insn->id != ARM_INS_PUSH) {
 593         return 0;
 594     }
 595     int i;
 596     for(i=0; i < insn->detail->arm.op_count; i++) {
 597         if(insn->detail->arm.operands[i].type == ARM_OP_REG
 598             && insn->detail->arm.operands[i].reg == ARM_REG_LR) {
 599             return 1;
 600         }
 601     }
 602     return 0;
 603 }
 604 
 605 // does insn pop LR (func end before tail call)
 606 int isPOP_LR(cs_insn *insn)
 607 {
 608     if(insn->id != ARM_INS_POP) {
 609         return 0;
 610     }
 611     int i;
 612     for(i=0; i < insn->detail->arm.op_count; i++) {
 613         if(insn->detail->arm.operands[i].type == ARM_OP_REG
 614             && insn->detail->arm.operands[i].reg == ARM_REG_LR) {
 615             return 1;
 616         }
 617     }
 618     return 0;
 619 }
 620 
 621 // does insn pop PC
 622 int isPOP_PC(cs_insn *insn)
 623 {
 624     if(insn->id != ARM_INS_POP) {
 625         return 0;
 626     }
 627     int i;
 628     for(i=0; i < insn->detail->arm.op_count; i++) {
 629         if(insn->detail->arm.operands[i].type == ARM_OP_REG
 630             && insn->detail->arm.operands[i].reg == ARM_REG_PC) {
 631             return 1;
 632         }
 633     }
 634     return 0;
 635 }
 636 
 637 // is the instruction ADD* rx, imm
 638 int isADDx_imm(cs_insn *insn)
 639 {
 640     return ((insn->id == ARM_INS_ADD || insn->id == ARM_INS_ADDW) && insn->detail->arm.operands[1].type == ARM_OP_IMM);
 641 }
 642 // is the instruction SUB* rx, imm
 643 int isSUBx_imm(cs_insn *insn)
 644 {
 645     return (IS_INSN_ID_SUBx(insn->id) && insn->detail->arm.operands[1].type == ARM_OP_IMM);
 646 }
 647 
 648 // is the instruction an ADR or ADR-like instruction?
 649 int isADRx(cs_insn *insn)
 650 {
 651     return ((insn->id == ARM_INS_ADR)
 652         || isSUBW_PC(insn)
 653         || isADDW_PC(insn)
 654         || (isARM(insn) && (isADD_PC(insn) || isSUB_PC(insn))));
 655 }
 656 
 657 // if insn is LDR Rn, [pc,#x] return pointer to value, otherwise null
 658 uint32_t* LDR_PC2valptr_thumb(firmware *fw, cs_insn *insn)
 659 {
 660     if(!isLDR_PC(insn)) {
 661         return NULL;
 662     }
 663     uint32_t adr;
 664     // TODO NOTE doesn't do anything with scale (which can supposedly be neg?),
 665     // appears correct for examples seen so far
 666     adr=(insn->address&~3)+4+insn->detail->arm.operands[1].mem.disp;
 667     return (uint32_t *)adr2ptr(fw,adr);
 668 }
 669 
 670 uint32_t* LDR_PC2valptr_arm(firmware *fw, cs_insn *insn)
 671 {
 672     if(!isLDR_PC(insn)) {
 673         return NULL;
 674     }
 675     uint32_t adr;
 676     // TODO NOTE doesn't do anything with scale (which can supposedly be neg?),
 677     // appears correct for examples seen so far
 678     adr=insn->address+8+insn->detail->arm.operands[1].mem.disp;
 679     return (uint32_t *)adr2ptr(fw,adr);
 680 }
 681 
 682 uint32_t* LDR_PC2valptr(firmware *fw, cs_insn *insn)
 683 {
 684     if(isARM(insn)) {
 685        return LDR_PC2valptr_arm(fw,insn);
 686     } else {
 687        return LDR_PC2valptr_thumb(fw,insn);
 688     }
 689 }
 690 
 691 // return the address of value loaded by LDR rd, [pc, #x] or 0 if not LDR PC
 692 uint32_t LDR_PC2adr(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 693 {
 694     if(!isLDR_PC(insn)) {
 695         return 0;
 696     }
 697     if(isARM(insn)) {
 698        return insn->address+8+insn->detail->arm.operands[1].mem.disp;
 699     } else {
 700        return (insn->address&~3)+4+insn->detail->arm.operands[1].mem.disp;
 701     }
 702 }
 703 
 704 // return value generated by an ADR or ADR-like instruction, or 0 (which should be rarely generated by ADR)
 705 uint32_t ADRx2adr(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 706 {
 707     if(insn->id == ARM_INS_ADR) {
 708         return (insn->address&~3)+4+insn->detail->arm.operands[1].imm;
 709     }
 710     if(isSUBW_PC(insn)) {
 711         return (insn->address&~3)+4-insn->detail->arm.operands[2].imm;
 712     }
 713     if(isADDW_PC(insn)) {
 714         return (insn->address&~3)+4+insn->detail->arm.operands[2].imm;
 715     }
 716     if(isARM(insn)) {
 717         if(isADD_PC(insn)) {
 718             return insn->address+8+insn->detail->arm.operands[2].imm;
 719         }
 720         if(isSUB_PC(insn)) {
 721             return insn->address+8-insn->detail->arm.operands[2].imm;
 722         }
 723     }
 724     return 0;
 725 }
 726 
 727 // return the value generated by an ADR (ie, the location of the value as a firmware address)
 728 // NOTE not checked if it is in dump
 729 uint32_t ADR2adr(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 730 {
 731     if(insn->id != ARM_INS_ADR) {
 732         return 0;
 733     }
 734     // TODO - capstone doesn't appear to generate ADR for ARM
 735     /*
 736     if(cs_insn_group(fw->cs_handle,insn,ARM_GRP_ARM)) {
 737        return 0;
 738     }
 739     */
 740     return (insn->address&~3)+4+insn->detail->arm.operands[1].imm;
 741 }
 742 
 743 // if insn is adr/ AKA ADD Rn, pc,#x return pointer to value, otherwise null
 744 uint32_t* ADR2valptr(firmware *fw, cs_insn *insn)
 745 {
 746     uint32_t adr=ADR2adr(fw,insn);
 747     return (uint32_t *)adr2ptr(fw,adr);
 748 }
 749 
 750 // return value loaded by PC relative LDR instruction, or 0 if out of range
 751 uint32_t LDR_PC2val(firmware *fw, cs_insn *insn)
 752 {
 753     uint32_t *p=LDR_PC2valptr(fw,insn);
 754     if(p) {
 755         return *p;
 756     }
 757     return 0;
 758 }
 759 
 760 // return value loaded by PC relative LDR pc..., or 0 if not matching or out of range
 761 uint32_t LDR_PC_PC_target(firmware *fw, cs_insn *insn)
 762 {
 763     if(!isLDR_PC_PC(insn)) {
 764         return 0;
 765     }
 766     return LDR_PC2val(fw,insn);
 767 }
 768 
 769 // return the target of B instruction, or 0 if current instruction isn't BL
 770 uint32_t B_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 771 {
 772     if(insn->id == ARM_INS_B) {
 773         return insn->detail->arm.operands[0].imm;
 774     }
 775     return 0; // TODO could be valid
 776 }
 777 
 778 
 779 // return the target of CBZ / CBNZ instruction, or 0 if current instruction isn't CBx
 780 uint32_t CBx_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 781 {
 782     if(insn->id == ARM_INS_CBZ || insn->id == ARM_INS_CBNZ) {
 783         return insn->detail->arm.operands[1].imm;
 784     }
 785     return 0; // TODO could be valid
 786 }
 787 
 788 // return the target of BLX instruction, or 0 if current instruction isn't BLX imm
 789 uint32_t BLXimm_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 790 {
 791     if(insn->id == ARM_INS_BLX && insn->detail->arm.operands[0].type == ARM_OP_IMM) {
 792         return insn->detail->arm.operands[0].imm;
 793     }
 794     return 0; // TODO could be valid
 795 }
 796 
 797 
 798 // return the target of BL instruction, or 0 if current instruction isn't BL
 799 uint32_t BL_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 800 {
 801     if(insn->id == ARM_INS_BL) {
 802         return insn->detail->arm.operands[0].imm;
 803     }
 804     return 0; // TODO could be valid
 805 }
 806 
 807 // as above, but also including B for tail calls
 808 uint32_t B_BL_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 809 {
 810     if(insn->id == ARM_INS_B || insn->id == ARM_INS_BL) {
 811         return insn->detail->arm.operands[0].imm;
 812     }
 813     return 0; // TODO could be valid
 814 }
 815 
 816 //
 817 // as above, but also including BLX imm
 818 uint32_t B_BL_BLXimm_target(__attribute__ ((unused))firmware *fw, cs_insn *insn)
 819 {
 820     if(insn->id == ARM_INS_B
 821         || insn->id == ARM_INS_BL
 822         || (insn->id == ARM_INS_BLX && insn->detail->arm.operands[0].type == ARM_OP_IMM)) {
 823         return insn->detail->arm.operands[0].imm;
 824     }
 825     return 0; // TODO could be valid
 826 }
 827 
 828 // get the (likely) range of jumptable entries from a pc relative TBB or TBH instruction
 829 // returns 0 on error or if instruction is not TBB/TBH
 830 // returns 1 if instruction is TBB/TBH [PC,...]
 831 int get_TBx_PC_info(firmware *fw,iter_state_t *is, tbx_info_t *ti)
 832 {
 833     if(!(is->insn->id == ARM_INS_TBH || is->insn->id == ARM_INS_TBB) || is->insn->detail->arm.operands[0].mem.base != ARM_REG_PC) {
 834         return 0;
 835     }
 836     ti->start=(uint32_t)is->adr; // after current instruction
 837     ti->first_target=0;
 838     ti->bytes=(is->insn->id == ARM_INS_TBH)?2:1;
 839 
 840     uint32_t max_adr;
 841     // max possible (assuming jumptable is contiguous)
 842     if(ti->bytes==1) {
 843         max_adr=ti->start+(2*255);
 844     } else {
 845         max_adr=ti->start+(2*65535);
 846     }
 847     arm_reg i_reg=is->insn->detail->arm.operands[0].mem.index;
 848     // backtrack looking for
 849     // cmp index reg,#imm
 850     // ...
 851     // bhs ...
 852     int max_backtrack = 8;
 853     if(is->ah.count - 1 < max_backtrack) {
 854         max_backtrack = is->ah.count-1;
 855     }
 856 
 857     int max_count=0;
 858     int found_bhs=0;
 859     int i;
 860     for(i=1;i<=max_backtrack;i++) {
 861         fw_disasm_iter_single(fw,adr_hist_get(&is->ah,i)); // thumb state comes from hist
 862         if(fw->is->insn->id == ARM_INS_B && fw->is->insn->detail->arm.cc == ARM_CC_HS) {
 863             found_bhs=1;
 864             continue;
 865         }
 866         // TODO lots of other ways condition code or reg could be changed in between
 867         if(found_bhs && fw->is->insn->id == ARM_INS_CMP) {
 868             // cmp with correct operands, assume number of jumptable entries
 869             if((arm_reg)fw->is->insn->detail->arm.operands[0].reg == i_reg
 870                 || fw->is->insn->detail->arm.operands[1].type == ARM_OP_IMM) {
 871                 max_count = fw->is->insn->detail->arm.operands[1].imm;
 872             }
 873             // otherwise, give up
 874             break;
 875         }
 876     }
 877     if(max_count) {
 878         max_adr = ti->start+max_count*ti->bytes;
 879         //printf("get_TBx_PC_info: max_count %d start 0x%08x max_adr=0x%08x\n",max_count,ti->start,max_adr);
 880     }
 881     uint32_t adr=ti->start;
 882     while(adr < max_adr) {
 883         uint8_t *p=adr2ptr(fw,adr);
 884         if(!p) {
 885             fprintf(stderr,"get_TBx_PC_info: jumptable outside of valid address range at 0x%08x\n",adr);
 886             return 0;
 887         }
 888         uint16_t off;
 889         if(ti->bytes==1) {
 890             off=(uint16_t)*p;
 891         } else {
 892             off=*(uint16_t *)p;
 893         }
 894 
 895         // 0, probably padding at the end (could probably break here)
 896         // note shouldn't be padding on tbh, since aligned for thumb
 897         if(!off) {
 898             break;
 899         }
 900         uint32_t target = ti->start+2*off;
 901         // may indicate non-jumptable entry, if count not found, so don't increment adr
 902         if(target <= adr) {
 903             fprintf(stderr,"get_TBx_PC_info: jumptable target 0x%08x inside jumptable %d at 0x%08x\n",target,off,adr);
 904             break;
 905         }
 906         if(!ti->first_target || target < ti->first_target) {
 907             ti->first_target=target;
 908             if(target < max_adr) {
 909                 max_adr=target; // assume jump table ends at/before first target
 910             }
 911         }
 912         adr+=ti->bytes;
 913     }
 914     // if found count, assume it's right
 915     if(max_count) {
 916         ti->count=max_count;
 917     } else {
 918         // otherwise, use final address
 919         ti->count=(adr-ti->start)/ti->bytes;
 920     }
 921     return 1;
 922 }
 923 
 924 // TODO should have variants of above including LDR pc, [pc, #x] for some of the above
 925 
 926 // ****** disassembly iterator utilities ******
 927 // allocate a new iterator state, optionally initializing at adr (0/invalid OK)
 928 iter_state_t *disasm_iter_new(firmware *fw, uint32_t adr)
 929 {
 930     iter_state_t *is=(iter_state_t *)malloc(sizeof(iter_state_t));
 931     // it doesn't currently appear to matter which handle is used to allocate
 932     // only used for overridable malloc functions and error reporting
 933     is->insn=cs_malloc(fw->cs_handle_arm);
 934     disasm_iter_init(fw,is,adr);
 935     return is;
 936 }
 937 
 938 // free iterator state and associated resources
 939 void disasm_iter_free(iter_state_t *is)
 940 {
 941     cs_free(is->insn,1);
 942     free(is);
 943     return;
 944 }
 945 
 946 // set iterator to adr, without clearing history (for branch following)
 947 // thumb bit in adr sets mode
 948 int disasm_iter_set(firmware *fw, iter_state_t *is, uint32_t adr)
 949 {
 950     // set handle based on thumb bit to allow disassembly
 951     if(ADR_IS_THUMB(adr)) {
 952         is->cs_handle=fw->cs_handle_thumb;
 953         is->thumb=1;
 954         is->insn_min_size=2;
 955         adr=ADR_CLEAR_THUMB(adr);// ADR used for iteration must not contain thumb bit
 956     } else {
 957         is->cs_handle=fw->cs_handle_arm;
 958         is->thumb=0;
 959         is->insn_min_size=4;
 960         if(!ADR_IS_ALIGN4(adr)) {
 961             fprintf(stderr,"disasm_iter_set: unaligned ARM address 0x%08x\n",adr);
 962             is->code=NULL;
 963             is->size=0;
 964             is->adr=0;
 965             return 0;
 966         }
 967     }
 968     uint8_t *p=adr2ptr(fw,adr);
 969     if(!p) {
 970 // TODO invalid currently allowed, for new
 971 //        fprintf(stderr,"disasm_iter_set: bad address 0x%08x\n",adr);
 972         is->code=NULL; // make first iter fail
 973         is->size=0;
 974         is->adr=0;
 975         return 0;
 976     }
 977     // TODO should maybe mark is.insn invalid?
 978     is->code=p;
 979     is->size=fw->size8 - (p-fw->buf8);
 980     is->adr=adr;
 981     return 1;
 982 }
 983 
 984 // initialize iterator state at adr, clearing history
 985 int disasm_iter_init(__attribute__ ((unused))firmware *fw, iter_state_t *is, uint32_t adr)
 986 {
 987     adr_hist_reset(&is->ah);
 988     return disasm_iter_set(fw,is,adr);
 989 }
 990 
 991 // disassemble next instruction, recording address in history
 992 // returns false if state invalid or disassembly fails
 993 // if disassembly fails, is->adr is not incremented
 994 int disasm_iter(__attribute__ ((unused))firmware *fw, iter_state_t *is)
 995 {
 996     // iter_start not called or invalid
 997     if(!is->code) {
 998         return 0;
 999     }
1000     adr_hist_add(&is->ah,(uint32_t)is->adr | is->thumb); // record thumb state to allow backtracking through state changes
1001     return cs_disasm_iter(is->cs_handle, &is->code, &is->size, &is->adr, is->insn);
1002 }
1003 
1004 // re-disassemble the current instruction
1005 // could be useful if turning detail off/on but doesn't seem to help perf much
1006 // NOTE out of date
1007 #if 0
1008 int disasm_iter_redo(firmware *fw,iter_state_t *is) {
1009     if(!is->code || !is->ah.count) {
1010         return 0;
1011     }
1012     is->code -= is->insn->size;
1013     is->adr -= is->insn->size;
1014     is->size += is->insn->size;
1015     // call iter directly, to avoid touching history
1016     return cs_disasm_iter(is->cs_handle, &is->code, &is->size, &is->adr, is->insn);
1017 }
1018 #endif
1019 
1020 // ***** disassembly utilities operating on the default iterator state *****
1021 /*
1022 initialize iter state to begin iterating at adr
1023 history is cleared
1024 */
1025 int fw_disasm_iter_start(firmware *fw, uint32_t adr)
1026 {
1027     return disasm_iter_init(fw,fw->is,adr);
1028 }
1029 
1030 // disassemble the next instruction, updating cached state
1031 int fw_disasm_iter(firmware *fw)
1032 {
1033     return disasm_iter(fw,fw->is);
1034 }
1035 
1036 // disassemble single instruction at given adr, updating cached values
1037 // history is cleared
1038 int fw_disasm_iter_single(firmware *fw, uint32_t adr)
1039 {
1040     fw_disasm_iter_start(fw,adr);
1041     return fw_disasm_iter(fw);
1042 }
1043 
1044 
1045 // ****** standalone disassembly without an iter_state ******
1046 /*
1047 disassemble up to count instructions starting at firmware address adr
1048 allocates and returns insns in insn, can be freed with cs_free(insn, count)
1049 */
1050 #if 0
1051 size_t fw_disasm_adr(firmware *fw, uint32_t adr, unsigned count, cs_insn **insn)
1052 {
1053     uint8_t *p=adr2ptr(fw,adr);
1054     if(!p) {
1055         *insn=NULL; // ?
1056         return 0;
1057     }
1058     return cs_disasm(fw->cs_handle, p, fw->size8 - (p-fw->buf8), adr, count, insn);
1059 }
1060 #endif
1061 
1062 // ***** utilities for searching disassembly over large ranges ******
1063 /*
1064 iterate over firmware disassembling, calling callback described above after each
1065 successful disassembly iteration.  If disassembly fails, the iter state is advanced
1066 minimum instruction size without calling the callback.
1067 starts at address is taken from the iter_state, which should be initialized with
1068 disasm_iter_new(), disasm_iter_init(), or a previous search or iter call.
1069 end defaults to end of ram code or rom code (before init data, if known), based on start
1070 v1 and udata are provided to the callback
1071 */
1072 uint32_t fw_search_insn(firmware *fw, iter_state_t *is, search_insn_fn f, uint32_t v1, void *udata, uint32_t adr_end)
1073 {
1074     uint32_t adr_start=is->adr;
1075     adr_range_t *r_start=adr_get_range(fw,adr_start);
1076     if(!r_start) {
1077         fprintf(stderr,"fw_search_insn: invalid start address 0x%08x\n",adr_start);
1078         return 0;
1079     }
1080 
1081     // default to end of start range
1082     if(!adr_end) {
1083         if(r_start->type == ADR_RANGE_ROM) {
1084             adr_end = fw->rom_code_search_max_adr;
1085         } else {
1086             adr_end=r_start->start + r_start->bytes - is->insn_min_size;
1087         }
1088     }
1089     adr_range_t *r_end=adr_get_range(fw,adr_end);
1090 
1091     if(!r_end) {
1092         fprintf(stderr,"fw_search_insn: invalid end address 0x%08x\n",adr_end);
1093         return 0;
1094     }
1095     // ignore thumb bit on end adr
1096     adr_end=ADR_CLEAR_THUMB(adr_end);
1097 
1098     if((r_start != r_end) || (adr_end < adr_start)) {
1099         fprintf(stderr,"fw_search_insn: invalid address range 0x%08x 0x%08x\n",adr_start,adr_end);
1100         return 0;
1101     }
1102 
1103     uint32_t adr=adr_start;
1104     // don't bother with buf ranges for RAM code
1105     if(r_start->type != ADR_RANGE_ROM) {
1106         while(adr < adr_end) {
1107             if(disasm_iter(fw,is)) {
1108                 uint32_t r=f(fw,is,v1,udata);
1109                 if(r) {
1110                     return r;
1111                 }
1112                 adr=(uint32_t)is->adr; // adr was updated by iter or called sub
1113             } else {
1114                 // disassembly failed
1115                 // increment by minimum instruction size and re-init
1116                 adr=adr+is->insn_min_size;
1117                 if(!disasm_iter_init(fw,is,adr|is->thumb)) {
1118                     fprintf(stderr,"fw_search_insn: disasm_iter_init failed\n");
1119                     return 0;
1120                 }
1121              }
1122         }
1123         return 0;
1124     }
1125     BufRange *br=fw->br;
1126     // TODO might want to (optionally?) turn off details? For now, caller can set, doesn't seem to help perf much
1127     // TODO when searching ROM, could skip over RAM copied areas (currently just limit default range)
1128     while(br && adr < adr_end) {
1129         uint32_t *p_adr=(uint32_t *)adr2ptr(fw,(uint32_t)adr);
1130         uint32_t *br_end = br->p + br->len;
1131         uint32_t adr_chunk_end = ptr2adr(fw,(uint8_t*)br_end);
1132         if(adr_end < adr_chunk_end) {
1133             adr_chunk_end = adr_end;
1134         }
1135         // address is before start of current range, adjust
1136         if(p_adr < br->p) {
1137             adr=ptr2adr(fw,(uint8_t *)br->p);
1138             if(!disasm_iter_init(fw,is,(uint32_t)adr)) {
1139                 return 0;
1140             }
1141             p_adr=(uint32_t *)adr2ptr(fw,(uint32_t)adr);
1142         }
1143         //printf("br:0x%08x-0x%08x\n",ptr2adr(fw,(uint8_t *)br->p),ptr2adr(fw,(uint8_t *)(br->p+br->len)));
1144         while(adr < adr_chunk_end) {
1145             if(disasm_iter(fw,is)) {
1146                 uint32_t r=f(fw,is,v1,udata);
1147                 if(r) {
1148                     return r;
1149                 }
1150                 adr=(uint32_t)is->adr; // adr was updated by iter or called sub
1151             } else {
1152                 // disassembly failed. cs_disarm_iter does not update address
1153                 // increment by half word and re-init
1154                 adr=adr+is->insn_min_size;
1155                 if(!disasm_iter_init(fw,is,adr|is->thumb)) {
1156                     fprintf(stderr,"fw_search_insn: disasm_iter_init failed\n");
1157                     return 0;
1158                 }
1159              }
1160         }
1161         // next range
1162         br=br->next;
1163     }
1164     return 0;
1165 }
1166 
1167 // ****** callbacks for use with fw_search_insn ******
1168 
1169 // search for constant references
1170 uint32_t search_disasm_const_ref(firmware *fw, iter_state_t *is, uint32_t val, __attribute__ ((unused))void *unused)
1171 {
1172 //    printf("%"PRIx64" %s %s\n",is->insn->address,is->insn->mnemonic, is->insn->op_str);
1173     uint32_t av=ADRx2adr(fw,is->insn);
1174     if(av) {
1175 //        printf("adr 0x%08x\n",av);
1176         if(av == val) {
1177             return (uint32_t)is->insn->address;
1178         }
1179         return 0;
1180     }
1181     uint32_t *pv=LDR_PC2valptr(fw,is->insn);
1182     if(pv) {
1183 //        printf("ldr 0x%08x\n",*pv);
1184         if(*pv == val) {
1185             return (uint32_t)is->insn->address;
1186         }
1187     }
1188     return 0;
1189 }
1190 
1191 // search for string ref
1192 uint32_t search_disasm_str_ref(firmware *fw, iter_state_t *is, __attribute__ ((unused))uint32_t val, void *udata)
1193 {
1194     const char *str=(const char *)udata;
1195 //    printf("%"PRIx64" %s %s\n",is->insn->address,is->insn->mnemonic, is->insn->op_str);
1196     uint32_t av=ADRx2adr(fw,is->insn);
1197     if(av) {
1198 //        printf("adr 0x%08x\n",av);
1199         char *cmp=(char *)adr2ptr_with_data(fw,av);
1200         if(cmp && (strcmp(cmp,str) == 0)) {
1201             return (uint32_t)is->insn->address;
1202         }
1203         return 0;
1204     }
1205     uint32_t *pv=LDR_PC2valptr(fw,is->insn);
1206     if(pv) {
1207 //        printf("ldr 0x%08x\n",*pv);
1208         char *cmp=(char *)adr2ptr_with_data(fw,*pv);
1209         if(cmp && (strcmp(cmp,str) == 0)) {
1210             return (uint32_t)is->insn->address;
1211         }
1212     }
1213     return 0;
1214 }
1215 
1216 // search for calls/jumps to immediate addresses
1217 // thumb bit in address should be set appropriately
1218 // returns 1 if found, address can be obtained from insn
1219 uint32_t search_disasm_calls(firmware *fw, iter_state_t *is, uint32_t val, __attribute__ ((unused))void *unused)
1220 {
1221     //printf("%"PRIx64" %s %s\n",is->insn->address,is->insn->mnemonic, is->insn->op_str);
1222     uint32_t sub=get_branch_call_insn_target(fw,is);
1223     if(sub) {
1224         if(sub == val) {
1225             return 1;
1226         }
1227     }
1228     return 0;
1229 }
1230 
1231 // a search_calls_multi_fn that just returns 1
1232 int search_calls_multi_end(__attribute__ ((unused))firmware *fw, __attribute__ ((unused))iter_state_t *is, __attribute__ ((unused))uint32_t adr) {
1233     return 1;
1234 }
1235 
1236 
1237 // Search for calls to multiple functions (more efficient than multiple passes)
1238 // if adr is found in null terminated search_calls_multi_data array, returns fn return value
1239 // otherwise 0
1240 uint32_t search_disasm_calls_multi(firmware *fw, iter_state_t *is, __attribute__ ((unused))uint32_t unused, void *userdata)
1241 {
1242     search_calls_multi_data_t *data=(search_calls_multi_data_t *)userdata;
1243     uint32_t sub=get_branch_call_insn_target(fw,is);
1244     if(sub) {
1245         while(data->adr) {
1246             if(data->adr == sub) {
1247                 return data->fn(fw,is,sub);
1248             }
1249             data++;
1250         }
1251     }
1252     return 0;
1253 }
1254 
1255 // as above, but check for single level of veneer
1256 uint32_t search_disasm_calls_veneer_multi(firmware *fw, iter_state_t *is, __attribute__ ((unused))uint32_t unused, void *userdata)
1257 {
1258     search_calls_multi_data_t *data=(search_calls_multi_data_t *)userdata;
1259     uint32_t sub=get_branch_call_insn_target(fw,is);
1260     if(sub) {
1261         while(data->adr) {
1262             if(data->adr == sub) {
1263                 return data->fn(fw,is,sub);
1264             }
1265             data++;
1266         }
1267         uint32_t veneer=0;
1268         fw_disasm_iter_single(fw,sub);
1269         veneer=get_branch_call_insn_target(fw,fw->is);
1270         data=(search_calls_multi_data_t *)userdata;
1271         while(data->adr) {
1272             if(data->adr == veneer) {
1273                 return data->fn(fw,is,sub);
1274             }
1275             data++;
1276         }
1277     }
1278     return 0;
1279 }
1280 
1281 // ****** utilities for extracting register values ******
1282 /*
1283 backtrack through is_init state history picking up constants loaded into r0-r3
1284 return bitmask of regs with values found
1285 affects fw->is, does not affect is_init
1286 
1287 NOTE values may be inaccurate for many reasons, doesn't track all reg affecting ops,
1288 doesn't account for branches landing in the middle of inspected code
1289 doesn't account for many conditional cases
1290 */
1291 int get_call_const_args(firmware *fw, iter_state_t *is_init, int max_backtrack, uint32_t *res)
1292 {
1293     int i;
1294     /*
1295     static int dbg_count=0;
1296     if(is_init->insn->address==...) {
1297         dbg_count=1;
1298     } else {
1299         dbg_count=0;
1300     }
1301     */
1302 
1303     // init regs to zero (to support adds etc)
1304     for (i=0;i<4;i++) {
1305         res[i]=0;
1306     }
1307 
1308     // count includes current instruction (i.e. BL of call)
1309     if(is_init->ah.count <= 1) {
1310         return 0;
1311     }
1312     if(is_init->ah.count - 1 < max_backtrack) {
1313         /*
1314         if(dbg_count > 0) {
1315             printf("max_backtrack %d hist count %d\n",max_backtrack,is_init->ah.count);
1316         }
1317         */
1318         max_backtrack = is_init->ah.count-1;
1319     }
1320     uint32_t found_bits=0; // registers with known const values
1321     uint32_t known_bits=0; // registers with some value
1322 
1323     for(i=1;i<=max_backtrack && known_bits !=0xf;i++) {
1324         // TODO going backwards and calling start each time inefficient
1325         // forward could also find multi-instruction constants in some cases (e.g mov + add, movw + movt)
1326         fw_disasm_iter_single(fw,adr_hist_get(&is_init->ah,i)); // thumb state comes from hist
1327         /*
1328         if(dbg_count > 0) {
1329             printf("backtrack %d:%d  ",dbg_count,i);
1330             printf("%"PRIx64" %s %s\n",fw->is->insn->address,fw->is->insn->mnemonic, fw->is->insn->op_str);
1331         }
1332         */
1333         arm_insn insn_id = fw->is->insn->id;
1334         // BL, BLX etc will trash r0-r3
1335         // only break on unconditional - optimistic, could produce incorrect results
1336         if((insn_id == ARM_INS_BL || insn_id == ARM_INS_BLX
1337             // B/BX could mean execution goes somewhere totally different, but in practice it often just skipping over a word of data...
1338              /*|| insn_id == ARM_INS_B || insn_id == ARM_INS_BX*/)
1339              && fw->is->insn->detail->arm.cc == ARM_CC_AL) {
1340             break;
1341         }
1342 
1343         // if the first op isn't REG, continue
1344         // TODO lots of instructions could affect reg even if not first op
1345         if(fw->is->insn->detail->arm.operands[0].type != ARM_OP_REG) {
1346             continue;
1347         }
1348         arm_reg rd = fw->is->insn->detail->arm.operands[0].reg;
1349         // capstone arm.h regs enum R0-R12 are ordered
1350         // enum has entries before R0
1351         if(rd < ARM_REG_R0 || rd > ARM_REG_R3) {
1352             continue;
1353         }
1354 
1355         int rd_i = rd - ARM_REG_R0;
1356         uint32_t rd_bit = 1 << rd_i;
1357         // if we don't already have something for this reg
1358         if(!(known_bits & rd_bit)) {
1359             // know something has been done to this reg
1360             // note doesn't account for conditionals
1361             known_bits |=rd_bit;
1362             // is it an LDR
1363             uint32_t *pv=LDR_PC2valptr(fw,fw->is->insn);
1364             if(pv) {
1365                 res[rd_i] += *pv;
1366 //                if(dbg_count) printf("found ldr r%d,=0x%08x\n",rd_i,res[rd_i]);
1367                 found_bits |=rd_bit;
1368                 continue;
1369             }
1370             uint32_t v=ADRx2adr(fw,fw->is->insn); // assumes ADR doesn't generate 0, probably safe
1371             if(v) {
1372                 res[rd_i] += v;
1373 //                 if(dbg_count) printf("found adrx r%d,0x%08x\n",rd_i,res[rd_i]);
1374                 found_bits |=rd_bit;
1375                 continue;
1376             }
1377             // immediate MOV note MOVT combinations, not accounted for, some handled ADDs below
1378             if( IS_INSN_ID_MOVx(insn_id)
1379                 && fw->is->insn->detail->arm.operands[1].type == ARM_OP_IMM) {
1380                 res[rd_i] += fw->is->insn->detail->arm.operands[1].imm;
1381 //                if(dbg_count) printf("found move r%d,#0x%08x\n",rd_i,res[rd_i]);
1382                 found_bits |=rd_bit;
1383             } else if(isADDx_imm(fw->is->insn)) {
1384                 res[rd_i] += fw->is->insn->detail->arm.operands[1].imm;
1385 //                if(dbg_count) printf("found add r%d,#0x%08x\n",rd_i,res[rd_i]);
1386                 // pretend reg is not known
1387                 known_bits ^=rd_bit;
1388                 // do not set found bit here
1389             } else if(isSUBx_imm(fw->is->insn)) {
1390                 res[rd_i] = (int)(res[rd_i]) - fw->is->insn->detail->arm.operands[1].imm;
1391 //                if(dbg_count) printf("found add r%d,#0x%08x\n",rd_i,res[rd_i]);
1392                 // pretend reg is not known
1393                 known_bits ^=rd_bit;
1394                 // do not set found bit here
1395             }/* else {
1396             }
1397             */
1398         }
1399     }
1400 //    if(dbg_count) printf("get_call_const_args found 0x%08x\n",found_bits);
1401     return found_bits;
1402 }
1403 
1404 /*
1405 starting from is_init, look for a direct jump, such as
1406  B <target>
1407  LDR PC, [pc, #x]
1408  movw ip, #x
1409  movt ip, #x
1410  bx ip
1411 if found, return target address with thumb bit set appropriately
1412 NOTE does not check for conditional
1413 uses fw->is
1414 does not check CBx, since it would generally be part of a function not a veneer
1415 */
1416 uint32_t get_direct_jump_target(firmware *fw, iter_state_t *is_init)
1417 {
1418     uint32_t adr=B_target(fw,is_init->insn);
1419     // B ... return with thumb set to current mode
1420     if(adr) {
1421         return (adr | is_init->thumb);
1422     }
1423     adr=LDR_PC_PC_target(fw,is_init->insn);
1424     // LDR pc #... thumb is set in the loaded address
1425     if(adr) {
1426         return adr;
1427     }
1428     // an immediate move to ip (R12), candidate for multi-instruction veneer
1429     if((is_init->insn->id == ARM_INS_MOV || is_init->insn->id == ARM_INS_MOVW)
1430         && is_init->insn->detail->arm.operands[0].reg == ARM_REG_IP
1431         && is_init->insn->detail->arm.operands[1].type == ARM_OP_IMM) {
1432         adr = is_init->insn->detail->arm.operands[1].imm;
1433         // iter in default state, starting from is_init
1434         if(!fw_disasm_iter_single(fw,is_init->adr | is_init->thumb)) {
1435             fprintf(stderr,"get_direct_jump_target: disasm single failed at 0x%"PRIx64"\n",fw->is->insn->address);
1436             return 0;
1437         }
1438         // check for MOVT ip, #x
1439         if(!(fw->is->insn->id == ARM_INS_MOVT
1440             && fw->is->insn->detail->arm.operands[0].reg == ARM_REG_IP
1441             && fw->is->insn->detail->arm.operands[1].type == ARM_OP_IMM)) {
1442 // doesn't match second two insn veneer, not really an arror
1443 //            fprintf(stderr,"get_direct_jump_target: not 2 insn ip veneer 0x%"PRIx64"\n",fw->is->insn->address);
1444             return 0;
1445         }
1446         // thumb set in loaded adr
1447         adr = (fw->is->insn->detail->arm.operands[1].imm << 16) | (adr&0xFFFF);
1448         if(!fw_disasm_iter(fw)) {
1449             fprintf(stderr,"get_direct_jump_target: disasm 2 failed at 0x%"PRIx64"\n",fw->is->insn->address);
1450             return 0;
1451         }
1452         // BX ip ?
1453         if(fw->is->insn->id == ARM_INS_BX
1454             && fw->is->insn->detail->arm.operands[0].type == ARM_OP_REG
1455             && fw->is->insn->detail->arm.operands[0].reg == ARM_REG_IP) {
1456             return adr;
1457         }
1458     }
1459     return 0;
1460 }
1461 
1462 /*
1463 return target of any single instruction branch or function call instruction,
1464 with thumb bit set appropriately
1465 returns 0 if current instruction not branch/call
1466 */
1467 uint32_t get_branch_call_insn_target(firmware *fw, iter_state_t *is)
1468 {
1469     uint32_t adr=B_BL_target(fw,is->insn);
1470     if(adr) {
1471         return (adr | is->thumb);
1472     }
1473     // CBx only exists in thumb
1474     if(is->thumb) {
1475         adr=CBx_target(fw,is->insn);
1476         if(adr) {
1477             return ADR_SET_THUMB(adr);
1478         }
1479     }
1480 
1481     adr=BLXimm_target(fw,is->insn);
1482     if(adr) {
1483         if(is->thumb) {
1484             return adr;
1485         } else {
1486             return adr | is->thumb;
1487         }
1488     }
1489 
1490     adr=LDR_PC_PC_target(fw,is->insn);
1491     if(adr) {
1492         return adr;
1493     }
1494     return 0;
1495 }
1496 
1497 /*
1498 search up to max_search_ins for first LDR, =value
1499 and then match up to max_seq_insns for a sequence like
1500 LDR Rbase,=adr
1501 ... possible intervening ins
1502 SUB Rbase,#adj // optional, may be any add/sub variant
1503 ... possible intervening ins
1504 LDR Rval,[Rbase + #off]
1505 
1506 returns 1 if found, 0 if not
1507 stores registers and constants in *result if successful
1508 
1509 NOTE bad values are possible with intervening ins, short sequences recommended
1510 
1511 TODO similar code for STR would be useful, but in many cases would have to handle load or move into reg_val
1512 */
1513 int find_and_get_var_ldr(firmware *fw,
1514                             iter_state_t *is,
1515                             int max_search_insns,
1516                             int max_seq_insns,
1517                             arm_reg match_val_reg, // ARM_REG_INVALID for any
1518                             var_ldr_desc_t *result)
1519 
1520 {
1521     if(!insn_match_find_next(fw,is,max_search_insns,match_ldr_pc)) {
1522         // printf("find_and_get_var_ldr: LDR PC not found\n");
1523         return 0;
1524     }
1525     var_ldr_desc_t r;
1526     memset(&r,0,sizeof(r));
1527     r.reg_base=is->insn->detail->arm.operands[0].reg;
1528     r.adr_base=LDR_PC2val(fw,is->insn);
1529     int seq_count=1;
1530 
1531     while(seq_count < max_seq_insns) {
1532         // disassembly failed, no match (could ignore..)
1533         if(!disasm_iter(fw,is)) {
1534             return 0;
1535         }
1536         // assume first encountered LDR x,[pc] is the one to use
1537         // give up if we encounter another. Don't know beforehand which reg is base
1538         // NOTE: backward search would allow matching base that eventually ends up in desired reg
1539         if(isLDR_PC(is->insn)) {
1540             // printf("find_and_get_var_ldr: second ldr pc\n");
1541             return  0;
1542         }
1543         seq_count++;
1544         // firmware may use add/sub to get actual firmware base address
1545         if(isADDx_imm(is->insn) || isSUBx_imm(is->insn)) {
1546             if((arm_reg)is->insn->detail->arm.operands[0].reg != r.reg_base) {
1547                 continue;
1548             }
1549             if(isADDx_imm(is->insn)) {
1550                 r.adj=is->insn->detail->arm.operands[1].imm;
1551             } else {
1552                 r.adj=-is->insn->detail->arm.operands[1].imm;
1553             }
1554             if(!disasm_iter(fw,is)) {
1555                 return 0;
1556             }
1557             seq_count++;
1558         } else {
1559             r.adj=0;
1560         }
1561         // try to bail out if base reg trashed
1562         // BL, BLX etc will trash r0-r3, B, BX go somewhere else
1563         // only break on unconditional - optimistic, could produce incorrect results
1564         // can't account for branches into searched code
1565         if((r.reg_base >= ARM_REG_R0 && r.reg_base <= ARM_REG_R3)
1566                 && (is->insn->id == ARM_INS_BL || is->insn->id == ARM_INS_BLX
1567                     || is->insn->id == ARM_INS_B || is->insn->id == ARM_INS_BX)
1568                 && is->insn->detail->arm.cc == ARM_CC_AL) {
1569             // printf("find_and_get_var_ldr: bail B*\n");
1570             return 0;
1571         }
1572         if(is->insn->id != ARM_INS_LDR || (arm_reg)is->insn->detail->arm.operands[1].reg != r.reg_base) {
1573             // other operation on with base reg as first operand, give up
1574             // simplistic, many other things could affect reg
1575             if(is->insn->detail->arm.operands[0].type == ARM_OP_REG && (arm_reg)is->insn->detail->arm.operands[0].reg == r.reg_base) {
1576                 // printf("find_and_get_var_ldr: bail mod base\n");
1577                 return 0;
1578             }
1579             continue;
1580         }
1581         r.reg_val = is->insn->detail->arm.operands[0].reg;
1582         if(match_val_reg != ARM_REG_INVALID && (r.reg_val != match_val_reg)) {
1583             continue;
1584         }
1585         r.off = is->insn->detail->arm.operands[1].mem.disp;
1586         r.adr_adj = r.adr_base + r.adj;
1587         r.adr_final = r.adr_adj + r.off;
1588         memcpy(result,&r,sizeof(r));
1589         return 1;
1590     }
1591     return 0;
1592 }
1593 
1594 /*
1595 check for, and optionally return information about
1596 functions with return values that can be completely determined
1597 from disassembly
1598 uses fw->is
1599 */
1600 // constants below may  as flags on input, and as return valaue
1601 // no simple function found
1602 #define MATCH_SIMPLE_FUNC_NONE    0x0
1603 // immediately returns, with no value
1604 #define MATCH_SIMPLE_FUNC_NULLSUB 0x1
1605 // immediately returns with a MOV constant
1606 #define MATCH_SIMPLE_FUNC_IMM     0x2
1607 // TODO LDR pc, =const,  ADR
1608 // TODO could also do pointer derefs and return pointer info without val
1609 #define MATCH_SIMPLE_FUNC_ANY     0x3
1610 int check_simple_func(firmware *fw, uint32_t adr, int match_ftype, simple_func_desc_t *info)
1611 {
1612     const insn_match_t match_mov_r0_imm[]={
1613         {MATCH_INS(MOV,   2),  {MATCH_OP_REG(R0),  MATCH_OP_IMM_ANY}},
1614 #if CS_API_MAJOR < 4
1615         {MATCH_INS(MOVS,  2),  {MATCH_OP_REG(R0),  MATCH_OP_IMM_ANY}},
1616 #endif
1617         {ARM_INS_ENDING}
1618     };
1619 
1620     int found = 0;
1621     int found_val = 0;
1622     if(info) {
1623         info->ftype = MATCH_SIMPLE_FUNC_NONE;
1624         info->retval = 0;
1625     }
1626     if(!fw_disasm_iter_single(fw,adr)) {
1627         //fprintf(stderr,"check_simple_func: disasm_iter_single failed 0x%x\n",adr);
1628         return 0;
1629     }
1630     if(match_ftype & MATCH_SIMPLE_FUNC_IMM) {
1631         // check mov r0, #imm
1632         if(insn_match_any(fw->is->insn,match_mov_r0_imm)) {
1633             found_val = fw->is->insn->detail->arm.operands[1].imm;
1634             found = MATCH_SIMPLE_FUNC_IMM;
1635             // fprintf(stderr,"check_simple_func: found IMM\n");
1636             if(!fw_disasm_iter(fw)) {
1637                 //fprintf(stderr,"check_simple_func: disasm_iter failed 0x%x\n",adr);
1638                 return 0;
1639             }
1640         }
1641     }
1642     if(!isRETx(fw->is->insn)) {
1643         // fprintf(stderr,"check_simple_func: no ret\n");
1644         return 0;
1645     }
1646     // no previous found, check if ret alone
1647     if(!found && (match_ftype & MATCH_SIMPLE_FUNC_NULLSUB)) {
1648         found = MATCH_SIMPLE_FUNC_NULLSUB;
1649         // fprintf(stderr,"check_simple_func: found nullsub\n");
1650     }
1651     if(found) {
1652         if(info) {
1653             info->ftype = found;
1654             info->retval = found_val;
1655         }
1656     }
1657     return found;
1658 }
1659 
1660 /*
1661 advance is trying to find the last function called by a function
1662 function assumed to push lr, pop lr or pc (many small functions don't!)
1663 either the last bl/blximmg before pop {... pc}
1664 or b after pop {... lr}
1665 after min_insns up to max_insns
1666 */
1667 uint32_t find_last_call_from_func(firmware *fw, iter_state_t *is,int min_insns, int max_insns)
1668 {
1669     int push_found=0;
1670     uint32_t last_adr=0;
1671     int count;
1672     for(count=0; count < max_insns; count++) {
1673         if(!disasm_iter(fw,is)) {
1674             fprintf(stderr,"find_last_call_from_func: disasm failed 0x%"PRIx64"\n",is->adr);
1675             return 0;
1676         }
1677         // TODO could match push regs with pop
1678         if(isPUSH_LR(is->insn)) {
1679             // already found a PUSH LR, probably in new function
1680             if(push_found) {
1681                 //printf("find_last_call_from_func: second push pc 0x%"PRIx64"\n",is->adr);
1682                 return 0;
1683             }
1684             push_found=1;
1685             continue;
1686         }
1687         // ignore everything before push (could be some mov/ldr, shoudln't be any calls)
1688         // TODO may want to allow starting in the middle of a function
1689         if(!push_found) {
1690             continue;
1691         }
1692         // found a potential call, store
1693         if(insn_match_any(is->insn,match_bl_blximm) && count >= min_insns) {
1694             //printf("find_last_call_from_func: found call 0x%"PRIx64"\n",is->adr);
1695             last_adr=get_branch_call_insn_target(fw,is);
1696             continue;
1697         }
1698         // found pop PC, can only be stored call if present
1699         if(isPOP_PC(is->insn)) {
1700             // printf("find_last_call_from_func: found pop PC 0x%"PRIx64"\n",is->adr);
1701             if(last_adr) {
1702                 return last_adr;
1703             }
1704             // no call found, or not found within min
1705             return 0;
1706         }
1707         // found pop LR, check if next is unconditional B
1708         if(isPOP_LR(is->insn)) {
1709             // hit func end with less than min, no match
1710             if(count < min_insns) {
1711                 // printf("find_last_call_from_func: pop before min 0x%"PRIx64"\n",is->adr);
1712                 return 0;
1713             }
1714             if(!disasm_iter(fw,is)) {
1715                 fprintf(stderr,"find_last_call_from_func: disasm failed 0x%"PRIx64"\n",is->adr);
1716                 return 0;
1717             }
1718             if(is->insn->id == ARM_INS_B && is->insn->detail->arm.cc == ARM_CC_AL) {
1719                 return get_branch_call_insn_target(fw,is);
1720             }
1721             // doen't go more than one insn after pop (could be more, but uncommon)
1722             // printf("find_last_call_from_func: more than one insn after pop 0x%"PRIx64"\n",is->adr);
1723             return 0;
1724         }
1725         // found another kind of ret, give up
1726         if(isRETx(is->insn)) {
1727             // printf("find_last_call_from_func: other ret 0x%"PRIx64"\n",is->adr);
1728             return 0;
1729         }
1730     }
1731     // printf("find_last_call_from_func: no match in range 0x%"PRIx64"\n",is->adr);
1732     return 0;
1733 }
1734 
1735 // ****** utilities for matching instructions and instruction sequences ******
1736 
1737 // some common matches for insn_match_find_next
1738 const insn_match_t match_b[]={
1739     {MATCH_INS(B,   MATCH_OPCOUNT_IGNORE)},
1740     {ARM_INS_ENDING}
1741 };
1742 const insn_match_t match_bl[]={
1743     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1744     {ARM_INS_ENDING}
1745 };
1746 const insn_match_t match_b_bl[]={
1747     {MATCH_INS(B,   MATCH_OPCOUNT_IGNORE)},
1748     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1749     {ARM_INS_ENDING}
1750 };
1751 
1752 const insn_match_t match_b_bl_blximm[]={
1753     {MATCH_INS(B,   MATCH_OPCOUNT_IGNORE)},
1754     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1755     {MATCH_INS(BLX, 1), {MATCH_OP_IMM_ANY}},
1756     {ARM_INS_ENDING}
1757 };
1758 
1759 const insn_match_t match_bl_blximm[]={
1760     {MATCH_INS(BL,  MATCH_OPCOUNT_IGNORE)},
1761     {MATCH_INS(BLX, 1), {MATCH_OP_IMM_ANY}},
1762     {ARM_INS_ENDING}
1763 };
1764 
1765 const insn_match_t match_bxlr[]={
1766     {MATCH_INS(BX, 1), {MATCH_OP_REG(LR)}},
1767     {ARM_INS_ENDING}
1768 };
1769 
1770 const insn_match_t match_ldr_pc[]={
1771     {MATCH_INS(LDR, 2), {MATCH_OP_REG_ANY,  MATCH_OP_MEM_BASE(PC)}},
1772     {ARM_INS_ENDING}
1773 };
1774 
1775 // iterate as long as sequence of instructions matches sequence defined in match
1776 int insn_match_seq(firmware *fw, iter_state_t *is, const insn_match_t *match)
1777 {
1778     //printf("%"PRIx64" insn_match_seq %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
1779     while(match->id != ARM_INS_ENDING && disasm_iter(fw,is) && insn_match(is->insn,match)) {
1780         //printf("%"PRIx64" insn_match_seq next %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
1781         match++;
1782     }
1783     return (match->id == ARM_INS_ENDING);
1784 }
1785 
1786 // check if single insn matches values defined by match
1787 int insn_match(cs_insn *insn,const insn_match_t *match)
1788 {
1789     // specific instruction ID requested, check
1790     if(match->id != ARM_INS_INVALID && insn->id != match->id) {
1791         return 0;
1792     }
1793     // condition code requested, check
1794     if(match->cc != ARM_CC_INVALID && insn->detail->arm.cc != match->cc) {
1795         return 0;
1796     }
1797     // no op checks, done
1798     if(match->op_count == MATCH_OPCOUNT_IGNORE) {
1799         return 1;
1800     }
1801     // operand count requested, check
1802     if(match->op_count >= 0 && insn->detail->arm.op_count != match->op_count) {
1803         return 0;
1804     }
1805     int i;
1806     // operands
1807     for(i=0;i<MATCH_MAX_OPS && i < insn->detail->arm.op_count; i++) {
1808         // specific type requested?
1809         if(match->operands[i].type != ARM_OP_INVALID && insn->detail->arm.operands[i].type != match->operands[i].type) {
1810             return 0;
1811         }
1812         // specific registers requested?
1813         if(match->operands[i].reg1 != ARM_REG_INVALID) {
1814             if(insn->detail->arm.operands[i].type == ARM_OP_REG) {
1815                 if((arm_reg)insn->detail->arm.operands[i].reg != match->operands[i].reg1) {
1816                     return 0;
1817                 }
1818             } else if(insn->detail->arm.operands[i].type == ARM_OP_MEM) {
1819                 if(insn->detail->arm.operands[i].mem.base != match->operands[i].reg1) {
1820                     return 0;
1821                 }
1822             } else {
1823                 fprintf(stderr,"insn_match: reg1 match requested on operand not reg or mem %d\n",
1824                         insn->detail->arm.operands[i].type);
1825             }
1826         }
1827         if(match->operands[i].reg2 != ARM_REG_INVALID) {
1828             if(insn->detail->arm.operands[i].type == ARM_OP_MEM) {
1829                 if(insn->detail->arm.operands[i].mem.index != match->operands[i].reg2) {
1830                     return 0;
1831                 }
1832             } else {
1833                 fprintf(stderr,"insn_match: reg2 match requested on operand not reg or mem %d\n",
1834                         insn->detail->arm.operands[i].type);
1835             }
1836         }
1837         if(match->operands[i].flags & MATCH_OP_FL_IMM) {
1838             if(insn->detail->arm.operands[i].type == ARM_OP_IMM
1839                     || insn->detail->arm.operands[i].type == ARM_OP_PIMM
1840                     || insn->detail->arm.operands[i].type == ARM_OP_CIMM) {
1841                 if(insn->detail->arm.operands[i].imm != match->operands[i].imm) {
1842                     return  0;
1843                 }
1844             } else if(insn->detail->arm.operands[i].type == ARM_OP_MEM) {
1845                 if(insn->detail->arm.operands[i].mem.disp != match->operands[i].imm) {
1846                     return  0;
1847                 }
1848             } else {
1849                 fprintf(stderr,"insn_match: imm match requested on operand not imm or mem %d\n",
1850                         insn->detail->arm.operands[i].type);
1851             }
1852         }
1853         if(match->operands[i].flags & MATCH_OP_FL_LAST) {
1854             break;
1855         }
1856     }
1857     return 1;
1858 }
1859 
1860 // check if single insn matches any of the provided matches
1861 int insn_match_any(cs_insn *insn,const insn_match_t *match)
1862 {
1863     const insn_match_t *m;
1864     // check matches
1865     for(m=match;m->id != ARM_INS_ENDING;m++) {
1866         if(insn_match(insn,m)) {
1867             return 1;
1868         }
1869     }
1870     return 0;
1871 }
1872 
1873 // iterate is until current instruction matches any of the provided matches or until limit reached
1874 int insn_match_find_next(firmware *fw, iter_state_t *is, int max_insns, const insn_match_t *match)
1875 {
1876     int i=0;
1877     while(i < max_insns) {
1878         // disassembly failed, no match (could ignore..)
1879         if(!disasm_iter(fw,is)) {
1880             return 0;
1881         }
1882         // printf("%"PRIx64" insn_match_find_next %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
1883         if(insn_match_any(is->insn,match)) {
1884             return 1;
1885         }
1886         i++;
1887     }
1888     // limit hit
1889     return 0;
1890 }
1891 
1892 // iterate is until current has matched any of the provided matches N times or until max_insns reached
1893 int insn_match_find_nth(firmware *fw, iter_state_t *is, int max_insns, int num_to_match, const insn_match_t *match)
1894 {
1895     int i=0;
1896     int num_matched=0;
1897     while(i < max_insns) {
1898         // disassembly failed, no match (could ignore..)
1899         if(!disasm_iter(fw,is)) {
1900             return 0;
1901         }
1902         // printf("%"PRIx64" insn_match_find_next %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
1903 
1904         const insn_match_t *m;
1905         // check matches
1906         for(m=match;m->id != ARM_INS_ENDING;m++) {
1907             if(insn_match(is->insn,m)) {
1908                 num_matched++;
1909             }
1910         }
1911         if(num_matched == num_to_match) {
1912             return 1;
1913         }
1914         i++;
1915     }
1916     // limit hit
1917     return 0;
1918 }
1919 
1920 // find next matching sequence starting within max_insns
1921 int insn_match_find_next_seq(firmware *fw, iter_state_t *is, int max_insns, const insn_match_t *match)
1922 {
1923     int count=0;
1924     while(count < max_insns) {
1925         const insn_match_t *m=match;
1926         //printf("%"PRIx64" insn_match_find_next_seq %s %s\n",is->insn->address,is->insn->mnemonic,is->insn->op_str);
1927         while(m->id != ARM_INS_ENDING && disasm_iter(fw,is) && insn_match(is->insn,m)) {
1928             m++;
1929             count++;
1930         }
1931         if(m->id == ARM_INS_ENDING) {
1932             return 1;
1933         }
1934         // non-matching
1935         count++;
1936     }
1937     return 0;
1938 }
1939 
1940 
1941 // Search the firmware for something. The desired matching is performed using the supplied 'func' function.
1942 // Continues searching until 'func' returns non-zero - then returns 1
1943 // otherwise returns 0.
1944 // Uses the BufRange structs to speed up searching
1945 // Note: this version searches byte by byte in the firmware dump instead of by words
1946 int fw_search_bytes(firmware *fw, search_bytes_fn func)
1947 {
1948     BufRange *p = fw->br;
1949     while (p)
1950     {
1951         int k;
1952         for (k = p->off*4; k < (p->off + p->len)*4; k++)
1953         {
1954             if (func(fw,k))
1955                 return 1;
1956         }
1957         p = p->next;
1958     }
1959     return 0;
1960 }
1961 
1962 
1963 // ****** firmware loading / initialization / de-allocation ******
1964 // add given address range
1965 void fw_add_adr_range(firmware *fw, uint32_t start, uint32_t end, uint32_t src_start, int type, int flags)
1966 {
1967     if(fw->adr_range_count == FW_MAX_ADR_RANGES) {
1968         fprintf(stderr,"fw_add_adr_range: FW_MAX_ADR_RANGES hit\n");
1969         return;
1970     }
1971     if(src_start < fw->base) {
1972         fprintf(stderr,"fw_add_adr_range: src_start 0x%08x < base 0x%08x\n",src_start,fw->base);
1973         return;
1974     }
1975     if(src_start >= fw->base+fw->size8) {
1976         fprintf(stderr,"fw_add_adr_range: src_start 0x%08x outside dump end 0x%08x\n",src_start,fw->base+fw->size8);
1977         return;
1978     }
1979     if(end <= start) {
1980         fprintf(stderr,"fw_add_adr_range: end 0x%08x <= start 0x%08x\n",end,start);
1981         return;
1982     }
1983     uint32_t len=end-start;
1984     if(len > 0xFFFFFFFF - src_start) {
1985         fprintf(stderr,"fw_add_adr_range: range too long %d\n",len);
1986         return;
1987     }
1988     if(len > fw->size8 - (start - fw->base)) {
1989         fprintf(stderr,"fw_add_adr_range: range outside of dump %d\n",len);
1990         return;
1991     }
1992     adr_range_t *r=&fw->adr_ranges[fw->adr_range_count];
1993     // TODO some firmware copies (i.e. g5x code 2) may end on non-word aligned address even though copy is words
1994     r->start=start;
1995     r->src_start=src_start;
1996     r->bytes=len;
1997     r->type=type;
1998     r->flags=flags;
1999     r->buf=fw->buf8 + (r->src_start - fw->base);
2000 
2001     fw->adr_range_count++;
2002 }
2003 
2004 void find_dryos_vers(firmware *fw)
2005 {
2006     const char *sig="DRYOS version 2.3, release #";
2007     fw->dryos_ver_count = find_bytes_all(fw,sig,strlen(sig),fw->base,fw->dryos_ver_list,FW_MAX_DRYOS_VERS);
2008     /*
2009     int i;
2010     for(i=0;i<fw->dryos_ver_count;i++) {
2011         fprintf(stderr,"found %s (%d) @0x%08x\n",
2012             (char *)adr2ptr(fw,fw->dryos_ver_list[i]),
2013             atoi((char *)adr2ptr(fw,fw->dryos_ver_list[i]+strlen(sig))),
2014             fw->dryos_ver_list[i]);
2015     }
2016     */
2017     if(fw->dryos_ver_count) {
2018         if(fw->dryos_ver_count == FW_MAX_DRYOS_VERS) {
2019             fprintf(stderr,"WARNING hit FW_MAX_DRYOS_VERS\n");
2020         }
2021         uint32_t i;
2022         int match_i;
2023         uint32_t min_adr = 0xFFFFFFFF;
2024 
2025         // ref should easily be in the first 8M (most near start but g7x2 at >0x500000)
2026         uint32_t maxadr = (fw->rom_code_search_max_adr - 0x800000 > fw->base)?fw->base + 0x800000:fw->rom_code_search_max_adr;
2027         // look for pointer to dryos version nearest to main ROM start, before the string itself
2028         // NOTE it's the *pointer* that must be nearest, the string may not be the first
2029         for(i=0; i<fw->dryos_ver_count; i++) {
2030             // TODO could limit range more, ctypes should be ref'd a lot
2031             // could sanity check not a random value that happens to match
2032             uint32_t adr = find_u32_adr_range(fw,fw->dryos_ver_list[i],fw->rom_code_search_min_adr,maxadr);
2033             if(adr && adr < min_adr) {
2034                 min_adr = adr;
2035                 match_i = i;
2036             }
2037         }
2038         if(min_adr == 0xFFFFFFFF) {
2039             fprintf(stderr,"WARNING dryos version pointer not found, defaulting to first\n");
2040             match_i = 0;
2041             min_adr = 0;
2042         }
2043         fw->dryos_ver_str = (char *)adr2ptr(fw,fw->dryos_ver_list[match_i]);
2044         fw->dryos_ver = atoi((char *)adr2ptr(fw,fw->dryos_ver_list[match_i]+strlen(sig)));
2045         fw->dryos_ver_adr = fw->dryos_ver_list[match_i];
2046         fw->dryos_ver_ref_adr = min_adr;
2047         // fprintf(stderr,"main firmware version %s @ 0x%08x ptr 0x%08x\n",fw->dryos_ver_str,fw->dryos_ver_adr,min_adr);
2048     } else {
2049         fw->dryos_ver=0;
2050         fw->dryos_ver_str=NULL;
2051         fw->dryos_ver_adr=0;
2052     }
2053 }
2054 
2055 // load firmware and initialize stuff that doesn't require disassembly
2056 void firmware_load(firmware *fw, const char *filename, uint32_t base_adr,int fw_arch)
2057 {
2058     FILE *f = fopen(filename, "rb");
2059     if (f == NULL)
2060     {
2061         fprintf(stderr,"Error opening %s\n",filename);
2062         exit(1);
2063     }
2064     fseek(f,0,SEEK_END);
2065     fw->size8 = ftell(f);
2066     fseek(f,0,SEEK_SET);
2067     // dumps should be an integral number of 32 bit words
2068     // ensures accessing as 32 bit ints safe
2069     if(fw->size8&3) {
2070         fprintf(stderr,"WARNING: dump size %d is not divisible by 4, truncating\n",fw->size8);
2071         fw->size8 &= ~3;
2072     }
2073 
2074     // adjust to ensure base_adr + size doesn't overflow
2075     if((int)(0xFFFFFFFF - base_adr) < fw->size8) {
2076         fprintf(stderr,"adjusted dump size 0x%08x->",fw->size8);
2077         fw->size8 = 0xFFFFFFFC - base_adr;
2078         fprintf(stderr,"0x%08x\n",fw->size8);
2079     }
2080 
2081     fw->arch=fw_arch;
2082     fw->size32=fw->size8/4;
2083 
2084     fw->base = base_adr;
2085 
2086     fw->buf8 = malloc(fw->size8);
2087     if(!fw->buf8) {
2088         fprintf(stderr,"malloc %d failed\n",fw->size8);
2089         exit(1);
2090     }
2091     fread(fw->buf8, 1, fw->size8, f);
2092     fclose(f);
2093     findRanges(fw);
2094 
2095     fw->adr_range_count=0;
2096     // add ROM
2097     fw_add_adr_range(fw,fw->base, fw->base+fw->size8, fw->base, ADR_RANGE_ROM, ADR_RANGE_FL_NONE);
2098 
2099     fw->main_offs = 0;
2100     int k = find_str(fw, "gaonisoy");
2101     // assume firmware start is 32 bit jump over goanisoy
2102     if(k == -1) {
2103         // suppress warning on vxworks, main firmware start is always offset 0
2104         if(find_str(fw,"VxWorks") == -1) {
2105             fprintf(stderr,"WARNING gaonisoy string not found, assuming code start offset 0\n");
2106         }
2107     } else if (k != 1) {
2108         // check at 0x20004 - note doesn't just use offset of first gaonisoy, because could be ref'd in romstarter
2109         if(fw_memcmp(fw,fw->base+0x20004,"gaonisoy",8) == 0) {
2110             fw->main_offs = 0x20000;
2111         } else if (fw_memcmp(fw,fw->base+0x10004,"gaonisoy",8) == 0) { // newer armv5 firmwares base ff81000 start at ff820000
2112             fw->main_offs = 0x10000;
2113         } else {
2114             fprintf(stderr,"WARNING code start offset not found, assuming 0\n");
2115         }
2116     }
2117 
2118     fw->rom_code_search_min_adr = fw->base + fw->main_offs; // 0 if not found
2119     fw->rom_code_search_max_adr=fw->base+fw->size8 - 4; // default == end of fw, may be adjusted by firmware_init_data_ranges
2120 
2121     find_dryos_vers(fw);
2122 
2123     fw->firmware_ver_str = 0;
2124     k = find_str(fw, "Firmware Ver ");
2125     if (k != -1)
2126     {
2127         fw->firmware_ver_str = (char *)fw->buf8 + k*4;
2128     }
2129     // set expected instruction set
2130     if(fw->arch==FW_ARCH_ARMv5) {
2131         fw->thumb_default = 0;
2132     } else if(fw->arch==FW_ARCH_ARMv7) {
2133         fw->thumb_default = 1;
2134     } else {
2135         fprintf(stderr,"firmware_init_capstone: invalid arch\n");
2136     }
2137 }
2138 
2139 // test to verify thumb blx bug is patched in linked capstone
2140 int do_blx_check(firmware *fw)
2141 {
2142 /*
2143 test code blxbork.S
2144 .syntax unified
2145 .globl arm_code
2146 .globl _start
2147 _start:
2148 .code 16
2149 blx arm_code
2150 movs r0, #1
2151 blx arm_code
2152 .align 4
2153 .code 32
2154 arm_code:
2155 bx lr
2156 
2157 arm-none-eabi-gcc -nostdlib blxbork.S -o blxbork.elf
2158 */
2159 
2160 static const uint8_t code[]=
2161     "\x00\xf0\x06\xe8" // blx arm_code (start + 0x10)
2162     "\x01\x20" // movs r0,#1, to cause non-word align
2163     "\x00\xf0\x04\xe8" // blx arm_code
2164 ;
2165     cs_insn *insn;
2166     size_t count;
2167     count = cs_disasm(fw->cs_handle_thumb, code, sizeof(code), 0xFF000000, 3, &insn);
2168 
2169     if(!(count == 3 && insn[0].id == ARM_INS_BLX && insn[2].id == ARM_INS_BLX)) {
2170         fprintf(stderr,"do_blx_check: disassembly failed\n");
2171         return 0;
2172     }
2173 
2174     int r=(insn[0].detail->arm.operands[0].imm == insn[2].detail->arm.operands[0].imm);
2175 
2176 
2177     if(!r) {
2178         fprintf(stderr,"WARNING! Incorrect disassembly is likely\n");
2179     }
2180     cs_free(insn,count);
2181     return r;
2182 }
2183 
2184 // initialize capstone state for loaded fw
2185 int firmware_init_capstone(firmware *fw)
2186 {
2187     if (cs_open(CS_ARCH_ARM, CS_MODE_ARM, &fw->cs_handle_arm) != CS_ERR_OK) {
2188         fprintf(stderr,"cs_open ARM failed\n");
2189         return 0;
2190     }
2191     cs_option(fw->cs_handle_arm, CS_OPT_DETAIL, CS_OPT_ON);
2192     if (cs_open(CS_ARCH_ARM, CS_MODE_THUMB, &fw->cs_handle_thumb) != CS_ERR_OK) {
2193         fprintf(stderr,"cs_open thumb failed\n");
2194         return 0;
2195     }
2196     cs_option(fw->cs_handle_thumb, CS_OPT_DETAIL, CS_OPT_ON);
2197     fw->is=disasm_iter_new(fw,0);
2198     do_blx_check(fw);
2199     return 1;
2200 }
2201 
2202 /*
2203 look for
2204 ldr rx, =ROM ADR
2205 ldr ry, =non-rom adr
2206 ldr rz, =non ROM adr > ry
2207 leave is pointing at last LDR, or last checked instruction
2208 */
2209 
2210 int find_startup_copy(firmware *fw,
2211                          iter_state_t *is,
2212                          int max_search,
2213                          uint32_t *src_start,
2214                          uint32_t *dst_start,
2215                          uint32_t *dst_end)
2216 {
2217     int count=0;
2218     uint32_t *fptr = NULL;
2219     uint32_t *dptr = NULL;
2220     uint32_t *eptr = NULL;
2221     *src_start=0;
2222     *dst_start=0;
2223     *dst_end=0;
2224 
2225     while(disasm_iter(fw,is) && count < max_search) {
2226         uint32_t *pv=LDR_PC2valptr(fw,is->insn);
2227         // not an LDR pc, reset
2228         // TODO some firmwares might use other instructions
2229         if(!pv) {
2230             fptr=dptr=eptr=NULL;
2231         }else if(!fptr) {
2232             // only candidate if in ROM
2233             if(*pv > fw->base) {
2234                 fptr=pv;
2235             }
2236         } else if(!dptr) {
2237             if(*pv < fw->base) {
2238                 dptr=pv;
2239             } else {
2240                 fptr=NULL; // dest address in ROM, reset
2241             }
2242         } else if(!eptr) {
2243             if(*pv < fw->base && *pv > *dptr) {
2244                 eptr=pv;
2245             } else { // dest end address in ROM, or before source, reset
2246                     // TODO maybe should swap instead if < source
2247                 fptr=dptr=NULL;
2248             }
2249         }
2250         if(fptr && dptr && eptr) {
2251             *src_start=*fptr;
2252             *dst_start=*dptr;
2253             *dst_end=*eptr;
2254             return 1;
2255         }
2256         count++;
2257     }
2258     return 0;
2259 }
2260 
2261 void find_exception_vec(firmware *fw, iter_state_t *is)
2262 {
2263     // check for exception vector, d7 id
2264     // only on thumb2 for now
2265     if(fw->arch != FW_ARCH_ARMv7) {
2266         return;
2267     }
2268 
2269     const insn_match_t match_bl_mcr[]={
2270         {MATCH_INS(BL,  1), {MATCH_OP_IMM_ANY}},
2271         // Vector Base Address Register MCR p15, 0, <Rt>, c12, c0, 0 - not present on PMSA
2272         {MATCH_INS(MCR, 6), {MATCH_OP_PIMM(15),MATCH_OP_IMM(0),MATCH_OP_REG_ANY,MATCH_OP_CIMM(12),MATCH_OP_CIMM(0),MATCH_OP_IMM(0)}},
2273         {ARM_INS_ENDING}
2274     };
2275 
2276     // reset to main fw start
2277     disasm_iter_init(fw, is, fw->base + fw->main_offs + 12 + fw->thumb_default);
2278     if(!insn_match_find_next(fw,is,4,match_bl_mcr)) {
2279         printf("no match!\n");
2280         return;
2281     }
2282     // check which instruction we matched
2283     uint32_t faddr = get_branch_call_insn_target(fw,is);
2284     if(faddr) {
2285         // bl = digic6, has function to set up exception vector
2286         disasm_iter_init(fw, is, faddr);
2287         disasm_iter(fw, is);
2288         int ra,rb;
2289         uint32_t va, vb;
2290         if(!IS_INSN_ID_MOVx(is->insn->id) || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2291             return;
2292         }
2293         ra = is->insn->detail->arm.operands[0].reg;
2294         va = is->insn->detail->arm.operands[1].imm;
2295         disasm_iter(fw, is);
2296         if(is->insn->id != ARM_INS_MOVT
2297             || is->insn->detail->arm.operands[0].reg != ra
2298             || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2299             return;
2300         }
2301         va = (is->insn->detail->arm.operands[1].imm << 16) | (va & 0xFFFF);
2302         // fw has BIC
2303         va = va & ~1;
2304         if(adr_get_range_type(fw,va) != ADR_RANGE_ROM) {
2305             return;
2306         }
2307         disasm_iter(fw, is);
2308         if(!IS_INSN_ID_MOVx(is->insn->id) || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2309             return;
2310         }
2311         rb = is->insn->detail->arm.operands[0].reg;
2312         vb = is->insn->detail->arm.operands[1].imm;
2313         disasm_iter(fw, is);
2314         if(is->insn->id != ARM_INS_MOVT
2315             || is->insn->detail->arm.operands[0].reg != rb
2316             || is->insn->detail->arm.operands[1].type != ARM_OP_IMM) {
2317             return;
2318         }
2319         vb = (is->insn->detail->arm.operands[1].imm << 16) | (vb & 0xFFFF);
2320         vb = vb & ~1;
2321         if(adr_get_range_type(fw,vb) != ADR_RANGE_ROM) {
2322             return;
2323         }
2324         if(va >= vb) {
2325             return;
2326         }
2327         fw_add_adr_range(fw,0,vb - va, va, ADR_RANGE_RAM_CODE, ADR_RANGE_FL_EVEC | ADR_RANGE_FL_TCM);
2328         // printf("ex vec 0x%08x-0x%08x\n",va,vb);
2329 
2330     } else if(is->insn->id == ARM_INS_MCR) {
2331         // digic 7 = mcr ...
2332         fw->arch_flags |= FW_ARCH_FL_VMSA;
2333         // rewind 1
2334         disasm_iter_init(fw, is, adr_hist_get(&is->ah,1));
2335         disasm_iter(fw, is);
2336         // uint32_t ex_vec = LDR_PC2val(fw,is->insn);
2337         //printf("found MCR @ 0x%"PRIx64" ex vec at 0x%08x\n",is->insn->address,ex_vec);
2338     }
2339 }
2340 
2341 // init basic copied RAM code / data ranges
2342 void firmware_init_data_ranges(firmware *fw)
2343 {
2344 //TODO maybe should return status
2345     uint32_t src_start, dst_start, dst_end;
2346     uint32_t data_found_copy = 0;
2347 
2348     // start at fw start  + 12 (32 bit jump, gaonisoy)
2349     iter_state_t *is=disasm_iter_new(fw, fw->base + fw->main_offs + 12 + fw->thumb_default);
2350 
2351     fw->data_init_start=0;
2352     fw->data_start=0;
2353     fw->data_len=0;
2354 
2355     fw->memisostart=0;
2356 
2357     int base2_found=0;
2358     int base3_found=0;
2359 
2360     // TODO  pre-d6 ROMs have a lot more stuff before first copy
2361     int max_search=100;
2362     while(find_startup_copy(fw,is,max_search,&src_start,&dst_start,&dst_end)) {
2363         // all known copied code is 3f1000 or higher, guess data
2364         if(dst_start < 0x100000) {
2365             // fprintf(stderr, "data?  @0x%"PRIx64" 0x%08x-0x%08x from 0x%08x\n",is->adr,dst_start,dst_end,src_start);
2366             if(fw->data_init_start) {
2367                 fprintf(stderr,"firmware_init_data_ranges: data already found, unexpected start 0x%08x src 0x%08x end 0x%08x\n",
2368                         dst_start,src_start,dst_end);
2369                 continue;
2370             }
2371 
2372             // not a known value, warn
2373             if(dst_start != 0x1900 && dst_start != 0x8000) {
2374                 fprintf(stderr,"firmware_init_data_ranges: guess unknown ROM data_start 0x%08x src 0x%08x end 0x%08x\n",
2375                         dst_start,src_start,dst_end);
2376             }
2377             fw->data_init_start=src_start;
2378             fw->data_start=dst_start;
2379             fw->data_len=dst_end-dst_start;
2380             fw_add_adr_range(fw,dst_start,dst_end,src_start, ADR_RANGE_INIT_DATA, ADR_RANGE_FL_NONE);
2381             data_found_copy=is->adr;
2382         } else if(dst_start < 0x08000000) { /// highest known first copied ram code 0x01900000
2383             // fprintf(stderr,"code1? @0x%"PRIx64" 0x%08x-0x%08x from 0x%08x\n",is->adr,dst_start,dst_end,src_start);
2384             if(base2_found) {
2385                 fprintf(stderr,"firmware_init_data_ranges: base2 already found, unexpected start 0x%08x src 0x%08x end 0x%08x\n",
2386                         dst_start,src_start,dst_end);
2387                 continue;
2388             }
2389             base2_found=1;
2390             // known values
2391             if( dst_start != 0x003f1000 &&
2392                 dst_start != 0x00431000 &&
2393                 dst_start != 0x00471000 &&
2394                 dst_start != 0x00685000 &&
2395                 dst_start != 0x00671000 &&
2396                 dst_start != 0x006b1000 &&
2397                 dst_start != 0x010c1000 &&
2398                 dst_start != 0x010e1000 &&
2399                 dst_start != 0x01900000) {
2400                 fprintf(stderr,"firmware_init_data_ranges: guess unknown base2 0x%08x src 0x%08x end 0x%08x\n",
2401                         dst_start,src_start,dst_end);
2402             }
2403             fw_add_adr_range(fw,dst_start,dst_end,src_start,ADR_RANGE_RAM_CODE, ADR_RANGE_FL_NONE);
2404         } else { // know < ROM based on match, assume second copied code
2405             // fprintf(stderr, "code2? @0x%"PRIx64" 0x%08x-0x%08x from 0x%08x\n",is->adr,dst_start,dst_end,src_start);
2406             if(base3_found) {
2407                 fprintf(stderr,"firmware_init_data_ranges: base3 already found, unexpected start 0x%08x src 0x%08x end 0x%08x\n",
2408                         dst_start,src_start,dst_end);
2409                 continue;
2410             }
2411             base3_found=1;
2412             if(dst_start != 0xbfe10800 && // known digic 6 value (g5x)
2413                dst_start != 0xdffc4900) { // known digic 7 value (m5)
2414                 fprintf(stderr,"firmware_init_data_ranges: guess unknown base3 0x%08x src 0x%08x end 0x%08x\n",
2415                         dst_start,src_start,dst_end);
2416             }
2417             fw_add_adr_range(fw,dst_start,dst_end,src_start,ADR_RANGE_RAM_CODE, ADR_RANGE_FL_TCM);
2418         }
2419         if(fw->data_start && base2_found && base3_found) {
2420             break;
2421         }
2422         // after first, shorter search range in between copies
2423         max_search=16;
2424     }
2425 
2426     // look for BSS init after last found copy
2427     if(data_found_copy) {
2428         int count=0;
2429         uint32_t *eptr=NULL;
2430         uint32_t *dptr=NULL;
2431         disasm_iter_init(fw,is,(data_found_copy-4) | fw->thumb_default);
2432         while(disasm_iter(fw,is) && count < 20) {
2433             uint32_t *pv=LDR_PC2valptr(fw,is->insn);
2434             // not an LDR pc, reset;
2435             if(!pv) {
2436                 //dptr=eptr=NULL;
2437             } else if(!dptr) {
2438                 // TODO older firmwares use reg with ending value from DATA copy
2439                 // should be equal to end pointer of data
2440                 if(*pv == fw->data_start + fw->data_len) {
2441                     dptr=pv;
2442                 }
2443             } else if(!eptr) {
2444                 if(*pv < fw->base) {
2445                     if(*pv != fw->data_start + fw->data_len) {
2446                         eptr=pv;
2447                     }
2448                 } else { // dest end address in ROM, reset
2449                     eptr=dptr=NULL;
2450                 }
2451             }
2452             if(dptr && eptr) {
2453                 // fprintf(stderr, "bss?   @0x%"PRIx64" 0x%08x-0x%08x\n",is->adr,*dptr,*eptr);
2454                 fw->memisostart=*eptr;
2455                 break;
2456             }
2457             count++;
2458         }
2459     }
2460 
2461     find_exception_vec(fw,is);
2462 
2463     // if data found, adjust default code search range
2464     // TODO could use copied code regions too, but after data on known firmwares
2465     if(fw->data_start) {
2466         fw->rom_code_search_max_adr=fw->data_init_start;
2467     }
2468     // if dryos version string found, use as search limit
2469     if(fw->dryos_ver_adr) {
2470         if(fw->dryos_ver_adr < fw->rom_code_search_max_adr) {
2471             fw->rom_code_search_max_adr = fw->dryos_ver_adr;
2472         }
2473     }
2474     disasm_iter_free(is);
2475 }
2476 
2477 // free resources associated with fw
2478 void firmware_unload(firmware *fw)
2479 {
2480     if(!fw) {
2481         return;
2482     }
2483     if(fw->is) {
2484         disasm_iter_free(fw->is);
2485     }
2486     if(fw->cs_handle_arm) {
2487         cs_close(&fw->cs_handle_arm);
2488     }
2489     if(fw->cs_handle_thumb) {
2490         cs_close(&fw->cs_handle_thumb);
2491     }
2492     free(fw->buf8);
2493     memset(fw,0,sizeof(firmware));
2494 }

/* [<][>][^][v][top][bottom][index][help] */