ROSE 2.1.0
Loading...
Searching...
No Matches
Partitioner2/Engine.h
1#ifndef ROSE_BinaryAnalysis_Partitioner2_Engine_H
2#define ROSE_BinaryAnalysis_Partitioner2_Engine_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
5
6#include <Rose/BasicTypes.h>
7#include <Rose/BinaryAnalysis/Architecture/BasicTypes.h>
8#include <Rose/BinaryAnalysis/Partitioner2/Exception.h>
9#include <Rose/BinaryAnalysis/Partitioner2/Modules.h>
10
11#include <Sawyer/DistinctList.h>
12#include <Sawyer/SharedObject.h>
13#include <Sawyer/SharedPointer.h>
14
15namespace Rose {
16namespace BinaryAnalysis {
17namespace Partitioner2 {
18
157 // Internal data structures
159public:
161 using Ptr = EnginePtr;
162
163 //--------------------------------------------------------------------------------------------------------------------------
164public:
168 struct Settings {
172 IndirectControlFlow::Settings icf;
177#ifdef ROSE_ENABLE_BOOST_SERIALIZATION
178 private:
179 friend class boost::serialization::access;
180 template<class S> void serialize(S&, unsigned version);
181#endif
182
183 public:
184 Settings();
185 ~Settings();
186 };
187
188 //--------------------------------------------------------------------------------------------------------------------------
189public:
192 public:
193 // WARNING: Defined in Engine.C with different behavior
194 // ~Exception() throw();
195 ~Exception();
196
198 explicit Exception(const std::string&);
199 };
200
201 //--------------------------------------------------------------------------------------------------------------------------
202public:
205 public:
208
215 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const = 0;
216 };
217
220 public:
221 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
222 };
223
226 size_t n_;
227 public:
229 explicit FirstPositionalArguments(size_t n);
230 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
231 };
232
235 size_t n_;
236 public:
238 explicit AllButLastArguments(size_t n);
239 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
240 };
241
244 size_t n_ = 0;
245 public:
250 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
251 };
252
253 //--------------------------------------------------------------------------------------------------------------------------
254protected:
255 // Engine callback for handling instructions added to basic blocks. This is called when a basic block is discovered,
256 // before it's attached to a partitioner, so it shouldn't really be modifying any state in the engine, but rather only
257 // preparing the basic block to be processed.
259 typedef Sawyer::Container::Map<Address /*target*/, std::vector<Address> /*sources*/> WorkList;
260 public:
262 protected:
264 public:
265 static Ptr instance();
266 virtual bool operator()(bool chain, const Args &args) override;
267 private:
268 void fixFunctionReturnEdge(const Args&);
269 void fixFunctionCallEdges(const Args&);
270 void addPossibleIndeterminateEdge(const Args&);
271 };
272
273 //--------------------------------------------------------------------------------------------------------------------------
274private:
275 // Basic blocks that need to be worked on next. These lists are adjusted whenever a new basic block (or placeholder) is
276 // inserted or erased from the CFG.
277 class BasicBlockWorkList: public CfgAdjustmentCallback {
278 // The following lists are used for adding outgoing E_CALL_RETURN edges to basic blocks based on whether the basic
279 // block is a call to a function that might return. When a new basic block is inserted into the CFG (or a previous
280 // block is removed, modified, and re-inserted), the operator() is called and conditionally inserts the block into the
281 // "pendingCallReturn" list (if the block is a function call that lacks an E_CALL_RETURN edge and the function is known
282 // to return or the analysis was incomplete).
283 //
284 // When we run out of other ways to create basic blocks, we process the pendingCallReturn list from back to front. If
285 // the back block (which gets popped) has a positive may-return result then an E_CALL_RETURN edge is added to the CFG
286 // and the normal recursive BB discovery is resumed. Otherwise if the analysis is incomplete the basic block is moved
287 // to the processedCallReturn list. The entire pendingCallReturn list is processed before proceeding.
288 //
289 // If there is no more pendingCallReturn work to be done, then the processedCallReturn blocks are moved to the
290 // finalCallReturn list and finalCallReturn is sorted by approximate CFG height (i.e., leafs first). The contents
291 // of the finalCallReturn list is then analyzed and the result (or the default may-return value for failed analyses)
292 // is used to decide whether a new CFG edge should be created, possibly adding new basic block addresses to the
293 // list of undiscovered blocks.
294 //
295 Sawyer::Container::DistinctList<Address> pendingCallReturn_; // blocks that might need an E_CALL_RETURN edge
296 Sawyer::Container::DistinctList<Address> processedCallReturn_; // call sites whose may-return was indeterminate
297 Sawyer::Container::DistinctList<Address> finalCallReturn_; // indeterminate call sites awaiting final analysis
298
299 Sawyer::Container::DistinctList<Address> undiscovered_; // undiscovered basic block list (last-in-first-out)
300 EnginePtr engine_; // engine to which this callback belongs
301 size_t maxSorts_; // max sorts before using unsorted lists
302 public:
303 ~BasicBlockWorkList();
304 protected:
305 BasicBlockWorkList(const EnginePtr &engine, size_t maxSorts);
306 public:
308 static Ptr instance(const EnginePtr &engine, size_t maxSorts);
309 virtual bool operator()(bool chain, const AttachedBasicBlock &args) override;
310 virtual bool operator()(bool chain, const DetachedBasicBlock &args) override;
311 Sawyer::Container::DistinctList<Address>& pendingCallReturn();
312 Sawyer::Container::DistinctList<Address>& processedCallReturn();
315 void moveAndSortCallReturn(const PartitionerConstPtr&);
316 };
317
318 //--------------------------------------------------------------------------------------------------------------------------
319protected:
320 // A work list providing constants from instructions that are part of the CFG.
322 public:
324
325 private:
326 std::set<Address> toBeExamined_; // instructions waiting to be examined
327 std::set<Address> wasExamined_; // instructions we've already examined
328 Address inProgress_; // instruction that is currently in progress
329 std::vector<Address> constants_; // constants for the instruction in progress
330
331 public:
333 protected:
335
336 public:
337 static Ptr instance();
338
339 // Address of instruction being examined.
340 Address inProgress();
341
342 // Possibly insert more instructions into the work list when a basic block is added to the CFG
343 virtual bool operator()(bool chain, const AttachedBasicBlock &attached) override;
344
345 // Possibly remove instructions from the worklist when a basic block is removed from the CFG
346 virtual bool operator()(bool chain, const DetachedBasicBlock &detached) override;
347
348 // Return the next available constant if any.
349 Sawyer::Optional<Address> nextConstant(const PartitionerConstPtr &partitioner);
350 };
351
353 // Data members
355private:
356 std::string name_; // factory name
357 Settings settings_; // Settings for the partitioner.
358 SgAsmInterpretation *interp_; // interpretation set by loadSpecimen
359 Architecture::BaseConstPtr architecture_; // architecture-specific information
360 MemoryMapPtr map_; // memory map initialized by load()
361 BasicBlockWorkList::Ptr basicBlockWorkList_; // what blocks to work on next
362 CodeConstants::Ptr codeFunctionPointers_; // generates constants that are found in instruction ASTs
363 ProgressPtr progress_; // optional progress reporting
364 std::vector<std::string> specimen_; // list of additional command line arguments (often file names)
365
367 // Construction and destruction
369public:
370 virtual ~Engine();
371
372protected:
374 Engine() = delete;
375 Engine(const Engine&) = delete;
376 Engine& operator=(const Engine&) = delete;
377
378protected:
380 Engine(const std::string &name, const Settings &settings);
381
382public:
383 // [Robb Matzke 2023-03-03]: deprecated.
384 // This used to create a binary engine, so we leave it in place for a while for improved backward compatibility
385 static EngineBinaryPtr instance() ROSE_DEPRECATED("use Engine::forge or EngineBinary::instance");
386
387private:
388 void init();
389
391 // Command-line processing
393public:
394
411 virtual std::list<Sawyer::CommandLine::SwitchGroup> commandLineSwitches();
412
418 std::list<Sawyer::CommandLine::SwitchGroup> allCommandLineSwitches();
419
431 virtual std::pair<std::string/*title*/, std::string /*doc*/> specimenNameDocumentation() = 0;
432
439 static std::list<std::pair<std::string /*title*/, std::string /*doc*/>> allSpecimenNameDocumentation();
440
448 virtual void addToParser(Sawyer::CommandLine::Parser&);
449
455 void addAllToParser(Sawyer::CommandLine::Parser&);
456
469 virtual Sawyer::CommandLine::Parser commandLineParser(const std::string &purpose, const std::string &description);
470
472 // Factories
474public:
483 static void registerFactory(const EnginePtr &factory);
484
491 static bool deregisterFactory(const EnginePtr &factory);
492
499 static std::vector<EnginePtr> registeredFactories();
500
526 //---------------------------------------------------------
527 // These operate on specimens
528 //---------------------------------------------------------
529
530 static EnginePtr forge(const std::vector<std::string> &specimen);
531 static EnginePtr forge(const std::string &specimen);
532
533 //---------------------------------------------------------
534 // These operate on arguments as std::vector<std::string>
535 //---------------------------------------------------------
536
537 // all args
538 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&,
539 const PositionalArgumentParser&, const Settings&);
540
541 // default settings
542 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&,
544
545 // default positional parser
546 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&, const Settings&);
547
548 // default positional parser and settings
549 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&);
550
551 //---------------------------------------------------------
552 // These operate on arguments as argc and argv
553 //---------------------------------------------------------
554
555 // all args
556 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const PositionalArgumentParser&, const Settings&);
557
558 // default settings
559 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const PositionalArgumentParser&);
560
561 // default positional parser
562 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const Settings&);
563
564 // default positional parser and settings
565 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&);
569 virtual bool matchFactory(const Sawyer::CommandLine::ParserResult &result, const std::vector<std::string> &specimen) const = 0;
570
576
582 bool isFactory() const;
583
585 // Top-level, do everything functions
587public:
612 SgAsmBlock* frontend(int argc, char *argv[],
613 const std::string &purpose, const std::string &description);
614 virtual SgAsmBlock* frontend(const std::vector<std::string> &args,
615 const std::string &purpose, const std::string &description) = 0;
619 // Basic top-level steps
621public:
627 virtual void reset();
628
655 Sawyer::CommandLine::ParserResult parseCommandLine(int argc, char *argv[],
656 const std::string &purpose, const std::string &description) /*final*/;
657 virtual Sawyer::CommandLine::ParserResult parseCommandLine(const std::vector<std::string> &args,
658 const std::string &purpose, const std::string &description);
675 virtual SgAsmBlock* buildAst(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
676 SgAsmBlock *buildAst(const std::string &fileName) /*final*/;
679 // [Robb Matzke 2023-03-03]: deprecated
680 // Save a partitioner and AST to a file.
681 //
682 // The specified partitioner and the binary analysis components of the AST are saved into the specified file, which is
683 // created if it doesn't exist and truncated if it does exist. The name should end with a ".rba" extension. The file can
684 // be loaded by passing its name to the @ref partition function or by calling @ref loadPartitioner.
685 virtual void savePartitioner(const PartitionerConstPtr&, const boost::filesystem::path&, Serialization::Format = Serialization::BINARY)
686 ROSE_DEPRECATED("use Partitioner::saveAsRbaFile");
687
688 // [Robb Matzke 2023-03-03]: deprecated
689 // Load a partitioner and an AST from a file.
690 //
691 // The specified RBA file is opened and read to create a new @ref Partitioner object and associated AST. The @ref
692 // partition function also understands how to open RBA files.
693 virtual PartitionerPtr loadPartitioner(const boost::filesystem::path&, Serialization::Format = Serialization::BINARY)
694 ROSE_DEPRECATED("use Partitioner::instanceFromRbaFile");
695
697 // Command-line parsing
699public:
718 virtual SgAsmInterpretation* parseContainers(const std::vector<std::string> &fileNames) = 0;
719 SgAsmInterpretation* parseContainers(const std::string &fileName) /*final*/;
742 virtual MemoryMapPtr loadSpecimens(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
743 MemoryMapPtr loadSpecimens(const std::string &fileName) /*final*/;
765 virtual PartitionerPtr partition(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
766 PartitionerPtr partition(const std::string &fileName) /*final*/;
775 virtual void checkSettings();
776
778 // Container parsing
779 //
780 // top-level: parseContainers
782public:
787 virtual bool isRbaFile(const std::string&);
788
793 virtual bool isNonContainer(const std::string&) = 0;
794
800 virtual bool areContainersParsed() const = 0;
801
803 // Load specimens
804 //
805 // top-level: loadSpecimens
807public:
811 virtual bool areSpecimensLoaded() const;
812
816 virtual void adjustMemoryMap();
817
825 MemoryMapPtr memoryMap() const /*final*/;
826 virtual void memoryMap(const MemoryMapPtr&);
830 // Architecture
832public:
851 virtual Architecture::BaseConstPtr obtainArchitecture();
852 virtual Architecture::BaseConstPtr obtainArchitecture(const Architecture::BaseConstPtr &hint);
856 // Partitioner high-level functions
857 //
858 // top-level: partition
860public:
863
870
875
879 virtual void runPartitionerInit(const PartitionerPtr&) = 0;
880
884 virtual void runPartitionerRecursive(const PartitionerPtr&) = 0;
885
890 virtual void runPartitionerFinal(const PartitionerPtr&) = 0;
891
897 virtual void runPartitioner(const PartitionerPtr&);
898
900 // Partitioner mid-level functions
901 //
902 // These are the functions called by the partitioner high-level stuff. These are sometimes overridden in subclasses,
903 // although it is more likely that the high-level stuff is overridden.
905public:
910 virtual void labelAddresses(const PartitionerPtr&, const Configuration&);
911
916
921
928
930 // Partitioner low-level functions
931 //
932 // These are functions that a subclass seldom overrides, and maybe even shouldn't override because of their complexity or
933 // the way the interact with one another.
935public:
936
937
939 // Build AST
941public:
942 // Used internally by ROSE's ::frontend disassemble instructions to build the AST that goes under each SgAsmInterpretation.
943 static void disassembleForRoseFrontend(SgAsmInterpretation*);
944
946 // Settings and properties
948public:
954 const std::string& name() const /*final*/;
955 void name(const std::string&);
962 Architecture::BaseConstPtr architecture();
963
970 const Settings& settings() const /*final*/;
971 Settings& settings() /*final*/;
972 void settings(const Settings&) /*final*/;
980 BasicBlockWorkList::Ptr basicBlockWorkList() const /*final*/;
981 void basicBlockWorkList(const BasicBlockWorkList::Ptr&) /*final*/;
982
989 void codeFunctionPointers(const CodeConstants::Ptr&) /*final*/;
1008 ProgressPtr progress() const /*final*/;
1009 virtual void progress(const ProgressPtr&);
1017 const std::vector<std::string>& specimen() const /*final*/;
1018 virtual void specimen(const std::vector<std::string>&);
1022 // Internal stuff
1024protected:
1025 // Similar to ::frontend but a lot less complicated.
1026 virtual SgProject* roseFrontendReplacement(const std::vector<boost::filesystem::path> &fileNames) = 0;
1027};
1028
1029} // namespace
1030} // namespace
1031} // namespace
1032
1033#endif
1034#endif
Base class for adjusting basic blocks during discovery.
Definition Modules.h:39
Engine for specimens containing machine instructions.
AllButLastArguments(size_t n)
Constructor returning all but last n arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
virtual bool operator()(bool chain, const Args &args) override
Callback method.
virtual bool operator()(bool chain, const DetachedBasicBlock &detached) override
Called when basic block is detached or placeholder erased.
virtual bool operator()(bool chain, const AttachedBasicBlock &attached) override
Called when basic block is attached or placeholder inserted.
Exception(const std::string &)
Construct an exception with a message string.
FirstPositionalArguments(size_t n)
Constructor returning up to n arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
GroupedPositionalArguments(size_t)
Constructor returning nth group of arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
GroupedPositionalArguments()
Constructor returning first group of arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const =0
Return specimen from positional arguments.
Base class for engines driving the partitioner.
virtual PartitionerPtr createBarePartitioner()
Create a bare partitioner.
virtual Sawyer::CommandLine::Parser commandLineParser(const std::string &purpose, const std::string &description)
Creates a command-line parser.
virtual bool matchFactory(const Sawyer::CommandLine::ParserResult &result, const std::vector< std::string > &specimen) const =0
Predicate for matching a concrete engine factory by parser result and specimen.
void addAllToParser(Sawyer::CommandLine::Parser &)
Add switches and sections to command-line parser.
virtual bool isRbaFile(const std::string &)
Determine whether a specimen is an RBA file.
MemoryMapPtr memoryMap() const
Property: memory map.
virtual void runPartitionerRecursive(const PartitionerPtr &)=0
Runs the recursive part of partioning.
virtual void addToParser(Sawyer::CommandLine::Parser &)
Add switches and sections to command-line parser.
virtual std::pair< std::string, std::string > specimenNameDocumentation()=0
Documentation about how the specimen is specified.
virtual std::vector< DataBlockPtr > makeConfiguredDataBlocks(const PartitionerPtr &, const Configuration &)
Make data blocks based on configuration.
static std::list< std::pair< std::string, std::string > > allSpecimenNameDocumentation()
Documentation for all specimen specifications.
Sawyer::CommandLine::ParserResult parseCommandLine(int argc, char *argv[], const std::string &purpose, const std::string &description)
Parse the command-line.
SgAsmInterpretation * interpretation() const
Property: interpretation.
virtual bool areSpecimensLoaded() const
Returns true if specimens are loaded.
const std::vector< std::string > & specimen() const
Property: specimen.
static bool deregisterFactory(const EnginePtr &factory)
Remove a concrete engine factory from the registry.
ProgressPtr progress() const
Property: progress reporting.
virtual void runPartitionerFinal(const PartitionerPtr &)=0
Runs the final parts of partitioning.
virtual bool areContainersParsed() const =0
Returns true if containers are parsed.
SgAsmBlock * frontend(int argc, char *argv[], const std::string &purpose, const std::string &description)
Most basic usage of the partitioner.
virtual EnginePtr instanceFromFactory(const Settings &)=0
Virtual constructor for factories.
virtual std::vector< FunctionPtr > makeConfiguredFunctions(const PartitionerPtr &, const Configuration &)
Make functions based on configuration information.
static void registerFactory(const EnginePtr &factory)
Register an engine as a factory.
virtual void adjustMemoryMap()
Adjust memory map post-loading.
Architecture::BaseConstPtr architecture()
Property: Architecture.
virtual void checkSettings()
Check settings after command-line is processed.
Engine()=delete
Default constructor.
virtual void runPartitionerInit(const PartitionerPtr &)=0
Finds interesting things to work on initially.
virtual PartitionerPtr createPartitioner()=0
Create partitioner.
static EnginePtr forge(const std::vector< std::string > &specimen)
Creates a suitable engine based on the specimen.
std::list< Sawyer::CommandLine::SwitchGroup > allCommandLineSwitches()
List of command-line switches for all engines.
bool isFactory() const
Returns true if this object is a factory.
virtual std::list< Sawyer::CommandLine::SwitchGroup > commandLineSwitches()
Command-line switches for a particular engine.
BasicBlockWorkList::Ptr basicBlockWorkList() const
Property: BasicBlock work list.
Engine(const std::string &name, const Settings &settings)
Allocating instance constructors are implemented by the non-abstract subclasses.
virtual SgAsmBlock * buildAst(const std::vector< std::string > &fileNames=std::vector< std::string >())=0
Obtain an abstract syntax tree.
CodeConstants::Ptr codeFunctionPointers() const
Property: Instruction AST constants.
const Settings & settings() const
Property: All settings.
virtual bool isNonContainer(const std::string &)=0
Determine whether a specimen name is a non-container.
const std::string & name() const
Property: Name.
virtual SgAsmInterpretation * parseContainers(const std::vector< std::string > &fileNames)=0
Parse specimen binary containers.
virtual void reset()
Reset the engine to its initial state.
virtual void labelAddresses(const PartitionerPtr &, const Configuration &)
Label addresses.
virtual MemoryMapPtr loadSpecimens(const std::vector< std::string > &fileNames=std::vector< std::string >())=0
Load and/or link interpretation.
virtual void updateAnalysisResults(const PartitionerPtr &)
Runs various analysis passes.
virtual void checkCreatePartitionerPrerequisites() const
Check that we have everything necessary to create a partitioner.
virtual void runPartitioner(const PartitionerPtr &)
Partitions instructions into basic blocks and functions.
virtual Architecture::BaseConstPtr obtainArchitecture()
Determine the architecture.
static std::vector< EnginePtr > registeredFactories()
List of all registered factories.
virtual PartitionerPtr partition(const std::vector< std::string > &fileNames=std::vector< std::string >())=0
Partition instructions into basic blocks and functions.
Partitions instructions into basic blocks and functions.
A doubly-linked list of distinct items.
Container associating values with keys.
Definition Sawyer/Map.h:72
Holds a value or nothing.
Definition Optional.h:54
Creates SharedPointer from this.
Base class for reference counted objects.
Instruction basic block.
Represents an interpretation of a binary container.
This class represents a source project, with a list of SgFile objects and global information about th...
std::shared_ptr< const Base > BaseConstPtr
Reference counted pointer for Architecture::Base.
Sawyer::SharedPointer< Engine > EnginePtr
Shared-ownership pointer for Engine.
std::uint64_t Address
Address.
Definition Address.h:11
The ROSE library.
Sawyer support library.
EngineSettings engine
Settings that control engine behavior.
LoaderSettings loader
Settings used during specimen loading.
DisassemblerSettings disassembler
Settings for creating the disassembler.
IndirectControlFlow::Settings icf
Settings for indirect control flow recovery.
AstConstructionSettings astConstruction
Settings for constructing the AST.
PartitionerSettings partitioner
Settings for creating a partitioner.
JvmSettings engineJvm
Settings that control behavior specific to EngineJvm.