ROSE 2.1.0
Loading...
Searching...
No Matches
String.h
1#ifndef ROSE_BinaryAnalysis_String_H
2#define ROSE_BinaryAnalysis_String_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
5
6#include <Rose/BinaryAnalysis/MemoryMap.h>
7#include <Rose/Diagnostics.h>
8#include <Rose/Exception.h>
9#include <Sawyer/CommandLine.h>
10#include <Sawyer/Optional.h>
11
12#include <boost/regex.hpp>
13#include <unordered_set>
14
15namespace Rose {
16namespace BinaryAnalysis {
17
174namespace Strings {
175
178
179typedef uint8_t Octet;
180typedef std::vector<Octet> Octets;
181typedef unsigned CodeValue;
182typedef std::vector<CodeValue> CodeValues;
183typedef unsigned CodePoint;
184typedef std::vector<CodePoint> CodePoints;
188 public:
189 Exception(const std::string& s) : Rose::Exception(s) {}
190};
191
227class ROSE_DLL_API FilterSpecification {
228 public:
230 enum class Type {
231 EXACT_MATCH,
232 SUBSTRING,
233 REGEX
234 };
235
237 enum class CaseSensitivity {
238 SENSITIVE,
239 INSENSITIVE
240 };
241
242 private:
243 Type type_;
244 std::string pattern_;
245 CaseSensitivity caseSensitivity_;
246
247 public:
256 FilterSpecification(Type type, const std::string& pattern);
257
266 FilterSpecification(const std::string& pattern, CaseSensitivity caseSensitivity);
267
269 Type type() const { return type_; }
270
272 const std::string& pattern() const { return pattern_; }
273
275 CaseSensitivity caseSensitivity() const { return caseSensitivity_; }
276
278 bool isCaseSensitive() const { return caseSensitivity_ == CaseSensitivity::SENSITIVE; }
279};
280
348class ROSE_DLL_API StringFilter {
349 private:
350 std::unordered_set<std::string> exactStrings_;
351 std::unordered_set<std::string> substrings_;
352 std::unordered_set<std::string> regexPatternsCaseSensitive_;
353 std::unordered_set<std::string> regexPatternsCaseInsensitive_;
354
355 mutable Sawyer::Optional<boost::regex> compiledRegexCaseSensitive_;
356 mutable Sawyer::Optional<boost::regex> compiledRegexCaseInsensitive_;
357 mutable bool regexNeedsUpdate_ = true;
358
359 void updateRegex() const;
360
361 public:
369
374 bool match(const std::string& str) const;
375};
376
400
402bool isDone(State st);
403
406
413class ROSE_DLL_API CharacterEncodingForm: public Sawyer::SharedObject {
414 protected:
415 State state_ = INITIAL_STATE;
416
417 public:
419 virtual ~CharacterEncodingForm() {}
420
423
425 virtual Ptr clone() const = 0;
426
428 virtual std::string name() const = 0;
429
435
437 State state() const { return state_; }
438
443 virtual State decode(CodeValue) = 0;
444
448 virtual CodePoint consume() = 0;
449
451 virtual void reset() = 0;
452};
453
458 CodePoint cp_;
459
460 protected:
461 NoopCharacterEncodingForm() : cp_(0) {}
462
463 public:
466
467 static Ptr instance() { return Ptr(new NoopCharacterEncodingForm); }
468 virtual CharacterEncodingForm::Ptr clone() const override { return Ptr(new NoopCharacterEncodingForm(*this)); }
469 virtual std::string name() const override { return "no-op"; }
470 virtual CodeValues encode(CodePoint cp) override;
471 virtual State decode(CodeValue) override;
472 virtual CodePoint consume() override;
473 virtual void reset() override;
474};
475
478
483 CodePoint cp_;
484
485 protected:
486 Utf8CharacterEncodingForm() : cp_(0) {}
487
488 public:
491
492 static Ptr instance() { return Ptr(new Utf8CharacterEncodingForm); }
493 virtual CharacterEncodingForm::Ptr clone() const override { return Ptr(new Utf8CharacterEncodingForm(*this)); }
494 virtual std::string name() const override { return "UTF-8"; }
495 virtual CodeValues encode(CodePoint cp) override;
496 virtual State decode(CodeValue) override;
497 virtual CodePoint consume() override;
498 virtual void reset() override;
499};
500
503
508 CodePoint cp_;
509
510 protected:
511 Utf16CharacterEncodingForm() : cp_(0) {}
512
513 public:
516
517 static Ptr instance() { return Ptr(new Utf16CharacterEncodingForm); }
518 virtual CharacterEncodingForm::Ptr clone() const override { return Ptr(new Utf16CharacterEncodingForm(*this)); }
519 virtual std::string name() const override { return "UTF-16"; }
520 virtual CodeValues encode(CodePoint cp) override;
521 virtual State decode(CodeValue) override;
522 virtual CodePoint consume() override;
523 virtual void reset() override;
524};
525
528
535 protected:
536 State state_ = INITIAL_STATE;
537
538 public:
540 virtual ~CharacterEncodingScheme() {}
541
544
546 virtual Ptr clone() const = 0;
547
549 virtual std::string name() const = 0;
550
553 virtual Octets encode(CodeValue) = 0;
554
556 State state() const { return state_; }
557
563 virtual State decode(Octet) = 0;
564
568 virtual CodeValue consume() = 0;
569
571 virtual void reset() = 0;
572};
573
580 size_t octetsPerValue_;
582 CodeValue cv_;
583
584 protected:
585 BasicCharacterEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex) :
586 octetsPerValue_(octetsPerValue), sex_(sex), cv_(0) {
587 ASSERT_require(1 == octetsPerValue || sex != ByteOrder::ORDER_UNSPECIFIED);
588 ASSERT_require(octetsPerValue <= sizeof(CodeValue));
589 }
590
591 public:
592 static Ptr instance(size_t octetsPerValue, ByteOrder::Endianness sex = ByteOrder::ORDER_UNSPECIFIED) {
593 return Ptr(new BasicCharacterEncodingScheme(octetsPerValue, sex));
594 }
595 virtual Ptr clone() const override { return Ptr(new BasicCharacterEncodingScheme(*this)); }
596 virtual std::string name() const override;
597 virtual Octets encode(CodeValue) override;
598 virtual State decode(Octet) override;
599 virtual CodeValue consume() override;
600 virtual void reset() override;
601};
602
606
611class ROSE_DLL_API LengthEncodingScheme: public Sawyer::SharedObject {
612 protected:
613 State state_ = INITIAL_STATE;
614
615 public:
617 virtual ~LengthEncodingScheme() {}
618
621
623 virtual Ptr clone() const = 0;
624
626 virtual std::string name() const = 0;
627
629 virtual Octets encode(size_t) = 0;
630
632 State state() const { return state_; }
633
639 virtual State decode(Octet) = 0;
640
644 virtual size_t consume() = 0;
645
647 virtual void reset() = 0;
648};
649
656 size_t octetsPerValue_;
658 size_t length_;
659
660 protected:
661 BasicLengthEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex) :
662 octetsPerValue_(octetsPerValue), sex_(sex), length_(0) {
663 ASSERT_require(1 == octetsPerValue || sex != ByteOrder::ORDER_UNSPECIFIED);
664 ASSERT_require(octetsPerValue <= sizeof(size_t));
665 }
666
667 public:
668 static Ptr instance(size_t octetsPerValue, ByteOrder::Endianness sex = ByteOrder::ORDER_UNSPECIFIED) {
669 return Ptr(new BasicLengthEncodingScheme(octetsPerValue, sex));
670 }
671 virtual Ptr clone() const override { return Ptr(new BasicLengthEncodingScheme(*this)); }
672 virtual std::string name() const override;
673 virtual Octets encode(size_t) override;
674 virtual State decode(Octet) override;
675 virtual size_t consume() override;
676 virtual void reset() override;
677};
678
682
686class ROSE_DLL_API CodePointPredicate: public Sawyer::SharedObject {
687 public:
688 virtual ~CodePointPredicate() {}
689
692
694 virtual std::string name() const = 0;
695
697 virtual bool isValid(CodePoint) = 0;
698};
699
704class ROSE_DLL_API PrintableAscii: public CodePointPredicate {
705 protected:
706 PrintableAscii() {}
707
708 public:
709 static Ptr instance() { return Ptr(new PrintableAscii); }
710 virtual std::string name() const override { return "printable ASCII"; }
711 virtual bool isValid(CodePoint) override;
712};
713
716
720class ROSE_DLL_API AnyCodePoint: public CodePointPredicate {
721 protected:
722 AnyCodePoint() {}
723
724 public:
725 static Ptr instance() { return Ptr(new AnyCodePoint); }
726 virtual std::string name() const override { return "any code point"; }
727 virtual bool isValid(CodePoint) override { return true; }
728};
729
732
737class ROSE_DLL_API StringEncodingScheme: public Sawyer::SharedObject {
738 protected:
739 State state_ = INITIAL_STATE; // decoding state
740 CodePoints codePoints_; // unconsumed code points
741 size_t nCodePoints_ = 0; // number of code points decoded since reset
745
746 protected:
748
751 ) : cef_(cef), ces_(ces), cpp_(cpp) {}
752
753 public:
754 virtual ~StringEncodingScheme() {}
755
758
760 virtual std::string name() const = 0;
761
763 virtual Ptr clone() const = 0;
764
766 virtual Octets encode(const CodePoints&) = 0;
767
769 State state() const { return state_; }
770
775 virtual State decode(Octet) = 0;
776
784
786 const CodePoints& codePoints() const { return codePoints_; }
787
789 size_t length() const { return nCodePoints_; }
790
792 virtual void reset();
793
803 void characterEncodingForm(const CharacterEncodingForm::Ptr& cef) { cef_ = cef; }
826 void codePointPredicate(const CodePointPredicate::Ptr& cpp) { cpp_ = cpp; }
828};
829
833class ROSE_DLL_API LengthEncodedString: public StringEncodingScheme {
835 Sawyer::Optional<size_t> declaredLength_; // decoded length
836 protected:
840 ) : StringEncodingScheme(cef, ces, cpp), les_(les) {}
841
842 public:
845
846 static Ptr instance(
849 ) {
850 return Ptr(new LengthEncodedString(les, cef, ces, cpp));
851 }
852 virtual StringEncodingScheme::Ptr clone() const override {
853 LengthEncodingScheme::Ptr les = les_->clone();
854 CharacterEncodingForm::Ptr cef = cef_->clone();
855 CharacterEncodingScheme::Ptr ces = ces_->clone();
856 CodePointPredicate::Ptr cpp = cpp_; // not cloned since they have no state
857 LengthEncodedString* inst = new LengthEncodedString(les, cef, ces, cpp);
858 inst->state_ = state_;
859 inst->codePoints_ = codePoints_;
860 inst->nCodePoints_ = nCodePoints_;
861 inst->declaredLength_ = declaredLength_;
862 return Ptr(inst);
863 }
864 virtual std::string name() const override;
865 virtual Octets encode(const CodePoints&) override;
866 virtual State decode(Octet) override;
867 virtual void reset() override;
868
874 Sawyer::Optional<size_t> declaredLength() const { return declaredLength_; }
875
882 void lengthEncodingScheme(const LengthEncodingScheme::Ptr& les) { les_ = les; }
884};
885
889 const CodePointPredicate::Ptr& cpp
890);
891
896
899lengthEncodedPrintableAsciiWide(size_t lengthSize, ByteOrder::Endianness order, size_t charSize);
900
905class ROSE_DLL_API TerminatedString: public StringEncodingScheme {
906 CodePoints terminators_;
907 Sawyer::Optional<CodePoint> terminated_; // decoded termination
908 protected:
911 const CodePointPredicate::Ptr& cpp, const CodePoints& terminators
912 ) : StringEncodingScheme(cef, ces, cpp), terminators_(terminators) {}
913
914 public:
917
918 static Ptr instance(
920 const CodePointPredicate::Ptr& cpp, const CodePoints& terminators
921 ) {
922 return Ptr(new TerminatedString(cef, ces, cpp, terminators));
923 }
924 static Ptr instance(
926 const CodePointPredicate::Ptr& cpp, CodePoint terminator = 0
927 ) {
928 return Ptr(new TerminatedString(cef, ces, cpp, CodePoints(1, terminator)));
929 }
930 virtual StringEncodingScheme::Ptr clone() const override {
931 CharacterEncodingForm::Ptr cef = cef_->clone();
932 CharacterEncodingScheme::Ptr ces = ces_->clone();
933 CodePointPredicate::Ptr cpp = cpp_; // not cloned since they have no state
934 TerminatedString* inst = new TerminatedString(cef, ces, cpp, terminators_);
935 inst->state_ = state_;
936 inst->codePoints_ = codePoints_;
937 inst->nCodePoints_ = nCodePoints_;
938 inst->terminated_ = terminated_;
939 return Ptr(inst);
940 }
941 virtual std::string name() const override;
942 virtual Octets encode(const CodePoints&) override;
943 virtual State decode(Octet) override;
944 virtual void reset() override;
945
950 Sawyer::Optional<CodePoint> terminated() const { return terminated_; }
951
959 const CodePoints& terminators() const { return terminators_; }
960 CodePoints& terminators() { return terminators_; }
962};
963
966
969
974class ROSE_DLL_API EncodedString {
975 StringEncodingScheme::Ptr encoder_; // how string is encoded
976 AddressInterval where_; // where encoded string is located
977 public:
978 EncodedString() {}
979 EncodedString(const StringEncodingScheme::Ptr& encoder, const AddressInterval& where) :
980 encoder_(encoder), where_(where) {}
981
983 StringEncodingScheme::Ptr encoder() const { return encoder_; }
984
986 const AddressInterval& where() const { return where_; }
987
989 Address address() const { return where_.least(); }
990
992 size_t size() const { return where_.size(); }
993
995 size_t length() const { return encoder_->length(); }
996
1000 const CodePoints& codePoints() const { return encoder_->codePoints(); }
1001
1005 std::string narrow() const;
1006
1008 std::wstring wide() const;
1009
1014 void decode(const MemoryMap&);
1015};
1016
1023class ROSE_DLL_API StringFinder {
1024 public:
1028 struct Settings {
1034
1040
1057
1064
1066
1067 Settings() : minLength(5), maxLength(-1), maxOverlap(8), keepingOnlyLongest(true) {}
1068 };
1069
1070 private:
1071 Settings settings_; // command-line settings for this analysis
1072 bool discardingCodePoints_; // whether to store decoded code points
1073 std::vector<StringEncodingScheme::Ptr> encoders_; // encodings to use when searching
1074 std::vector<EncodedString> strings_; // strings that have been found
1075
1076 public:
1081 StringFinder() : discardingCodePoints_(false) {}
1082
1086 const Settings& settings() const { return settings_; }
1087 Settings& settings() { return settings_; }
1097 bool discardingCodePoints() const { return discardingCodePoints_; }
1099 discardingCodePoints_ = b;
1100 return *this;
1101 }
1110 const std::vector<StringEncodingScheme::Ptr>& encoders() const { return encoders_; }
1111 std::vector<StringEncodingScheme::Ptr>& encoders() { return encoders_; }
1141
1148
1153 strings_.clear();
1154 return *this;
1155 }
1156
1189
1194 StringFinder& find(SgAsmInterpretation* interp, std::vector<std::string>&);
1195
1199 const std::vector<EncodedString>& strings() const { return strings_; }
1200 std::vector<EncodedString>& strings() { return strings_; }
1206 std::ostream& print(std::ostream&) const;
1207};
1208
1209std::ostream& operator<<(std::ostream&, const StringFinder&);
1210
1211} // namespace Strings
1212} // namespace BinaryAnalysis
1213} // namespace Rose
1214
1215#endif
1216#endif
An efficient mapping from an address space to stored data.
Definition MemoryMap.h:119
virtual std::string name() const override
Name of predicate.
Definition String.h:726
virtual bool isValid(CodePoint) override
Predicate.
Definition String.h:727
virtual CodeValue consume() override
Consume a decoded code value.
virtual Ptr clone() const override
Create a new copy of this encoder.
Definition String.h:595
virtual std::string name() const override
Name of encoder.
virtual State decode(Octet) override
Decode one octet.
virtual Octets encode(CodeValue) override
Encode a code value into a sequence of octets.
virtual void reset() override
Reset the decoder state machine.
virtual std::string name() const override
Name of encoder.
virtual void reset() override
Reset the decoder state machine.
virtual Ptr clone() const override
Create a new copy of this encoder.
Definition String.h:671
virtual size_t consume() override
Consume a decoded length.
virtual Octets encode(size_t) override
Encode a length into a sequence of octets.
virtual State decode(Octet) override
Decode one octet.
Defines mapping between code points and code values.
Definition String.h:413
virtual Ptr clone() const =0
Create a new encoder from this one.
virtual void reset()=0
Reset the decoder state machine.
virtual State decode(CodeValue)=0
Decode one code value.
Sawyer::SharedPointer< CharacterEncodingForm > Ptr
Shared ownership pointer to a CharacterEncodingForm.
Definition String.h:422
virtual std::string name() const =0
Name of encoder.
virtual CodeValues encode(CodePoint)=0
Encode a code point into a sequence of one or more code values.
virtual CodePoint consume()=0
Consume a decoded code point.
Defines the mapping between code values and octets.
Definition String.h:534
virtual State decode(Octet)=0
Decode one octet.
virtual void reset()=0
Reset the decoder state machine.
virtual std::string name() const =0
Name of encoder.
Sawyer::SharedPointer< CharacterEncodingScheme > Ptr
Shared ownership pointer to a CharacterEncodingScheme.
Definition String.h:543
virtual CodeValue consume()=0
Consume a decoded code value.
virtual Octets encode(CodeValue)=0
Encode a code value into a sequence of octets.
virtual Ptr clone() const =0
Create a new copy of this encoder.
virtual std::string name() const =0
Name of predicate.
Sawyer::SharedPointer< CodePointPredicate > Ptr
Shared ownership pointer to a CodePointPredicate.
Definition String.h:691
virtual bool isValid(CodePoint)=0
Predicate.
const AddressInterval & where() const
Where the string is located in memory.
Definition String.h:986
StringEncodingScheme::Ptr encoder() const
Information about the string encoding.
Definition String.h:983
size_t length() const
Length of encoded string in code points.
Definition String.h:995
const CodePoints & codePoints() const
Code points associated with the string.
Definition String.h:1000
std::string narrow() const
Return code points as a C++ std::string.
size_t size() const
Size of encoded string in bytes.
Definition String.h:992
std::wstring wide() const
Return code points as a C++ std::wstring.
void decode(const MemoryMap &)
Decodes the string from memory.
Address address() const
Starting address of string in memory.
Definition String.h:989
Errors for string analysis.
Definition String.h:187
Specification for a single filter criterion.
Definition String.h:227
const std::string & pattern() const
Property: Pattern string.
Definition String.h:272
Type type() const
Property: Type of filter.
Definition String.h:269
FilterSpecification(const std::string &pattern, CaseSensitivity caseSensitivity)
Constructor for regex filter with explicit case sensitivity.
bool isCaseSensitive() const
Returns true if this is a case-sensitive regex filter.
Definition String.h:278
FilterSpecification(Type type, const std::string &pattern)
Constructor for exact match or substring filter.
CaseSensitivity
Regex case sensitivity mode.
Definition String.h:237
CaseSensitivity caseSensitivity() const
Property: Case sensitivity (only meaningful for REGEX type).
Definition String.h:275
Length-prefixed string encoding scheme.
Definition String.h:833
void lengthEncodingScheme(const LengthEncodingScheme::Ptr &les)
Property: Lengh encoding scheme.
Definition String.h:882
virtual State decode(Octet) override
Decode one octet.
Sawyer::Optional< size_t > declaredLength() const
Returns the declared length, if any.
Definition String.h:874
LengthEncodingScheme::Ptr lengthEncodingScheme() const
Property: Lengh encoding scheme.
Definition String.h:881
virtual StringEncodingScheme::Ptr clone() const override
Create a new copy of this encoder.
Definition String.h:852
virtual Octets encode(const CodePoints &) override
Encode a string into a sequence of octets.
virtual std::string name() const override
Name of encoding.
virtual void reset() override
Reset the state machine to an initial state.
Sawyer::SharedPointer< LengthEncodedString > Ptr
Shared ownership pointer to a LengthEncodedString.
Definition String.h:844
Encoding for the length of a string.
Definition String.h:611
virtual void reset()=0
Reset the decoder state machine.
virtual State decode(Octet)=0
Decode one octet.
virtual std::string name() const =0
Name of encoder.
Sawyer::SharedPointer< LengthEncodingScheme > Ptr
Shared ownership pointer to a LengthEncodingScheme.
Definition String.h:620
virtual size_t consume()=0
Consume a decoded length.
virtual Octets encode(size_t)=0
Encode a length into a sequence of octets.
virtual Ptr clone() const =0
Create a new copy of this encoder.
virtual void reset() override
Reset the decoder state machine.
virtual std::string name() const override
Name of encoder.
Definition String.h:469
virtual CodePoint consume() override
Consume a decoded code point.
virtual CharacterEncodingForm::Ptr clone() const override
Create a new encoder from this one.
Definition String.h:468
Sawyer::SharedPointer< NoopCharacterEncodingForm > Ptr
Shared-ownership pointer to a NoopCharacterEncodingForm.
Definition String.h:465
virtual State decode(CodeValue) override
Decode one code value.
virtual CodeValues encode(CodePoint cp) override
Encode a code point into a sequence of one or more code values.
virtual bool isValid(CodePoint) override
Predicate.
virtual std::string name() const override
Name of predicate.
Definition String.h:710
virtual Ptr clone() const =0
Create a new copy of this encoder.
virtual void reset()
Reset the state machine to an initial state.
CharacterEncodingForm::Ptr characterEncodingForm() const
Property: Character encoding format.
Definition String.h:802
void characterEncodingScheme(const CharacterEncodingScheme::Ptr &ces)
Property: Character encoding scheme.
Definition String.h:815
virtual std::string name() const =0
Name of encoding.
virtual Octets encode(const CodePoints &)=0
Encode a string into a sequence of octets.
void codePointPredicate(const CodePointPredicate::Ptr &cpp)
Property: Code point predicate.
Definition String.h:826
CodePoints consume()
Consume pending decoded code points.
CharacterEncodingScheme::Ptr characterEncodingScheme() const
Property: Character encoding scheme.
Definition String.h:814
void characterEncodingForm(const CharacterEncodingForm::Ptr &cef)
Property: Character encoding format.
Definition String.h:803
size_t length() const
Number of code points decoded since reset.
Definition String.h:789
const CodePoints & codePoints() const
Return pending decoded code points without consuming them.
Definition String.h:786
virtual State decode(Octet)=0
Decode one octet.
CodePointPredicate::Ptr codePointPredicate() const
Property: Code point predicate.
Definition String.h:825
Sawyer::SharedPointer< StringEncodingScheme > Ptr
Shared ownership pointer to a StringEncodingScheme.
Definition String.h:757
Filter strings by exact matches, substring matches, and regex matches.
Definition String.h:348
bool match(const std::string &str) const
Test if a string matches any filter.
void addFilter(const FilterSpecification &spec)
Add a filter specification.
Analysis to find encoded strings.
Definition String.h:1023
StringFinder & find(SgAsmInterpretation *interp, std::vector< std::string > &)
Finds strings from an interpretation.
Settings & settings()
Property: Analysis settings often set from a command-line.
Definition String.h:1087
std::vector< EncodedString > & strings()
Obtain strings that were found.
Definition String.h:1200
const std::vector< EncodedString > & strings() const
Obtain strings that were found.
Definition String.h:1199
StringFinder & discardingCodePoints(bool b)
Property: Whether to discard code points.
Definition String.h:1098
StringFinder & insertCommonEncoders(ByteOrder::Endianness)
Inserts common encodings.
const std::vector< StringEncodingScheme::Ptr > & encoders() const
Property: List of string encodings.
Definition String.h:1110
static Sawyer::CommandLine::SwitchGroup commandLineSwitches(Settings &)
Command-line parser for analysis settings.
const Settings & settings() const
Property: Analysis settings often set from a command-line.
Definition String.h:1086
std::ostream & print(std::ostream &) const
Print results.
StringFinder & find(const MemoryMap::ConstConstraints &, Sawyer::Container::MatchFlags flags=0)
Finds strings by searching memory.
StringFinder & insertUncommonEncoders(ByteOrder::Endianness)
Inserts less common encodings.
Sawyer::CommandLine::SwitchGroup commandLineSwitches()
Command-line parser for analysis settings.
bool discardingCodePoints() const
Property: Whether to discard code points.
Definition String.h:1097
std::vector< StringEncodingScheme::Ptr > & encoders()
Property: List of string encodings.
Definition String.h:1111
StringFinder & reset()
Reset analysis results.
Definition String.h:1152
Terminated string encoding scheme.
Definition String.h:905
Sawyer::Optional< CodePoint > terminated() const
Returns the decoded termination character, if any.
Definition String.h:950
virtual Octets encode(const CodePoints &) override
Encode a string into a sequence of octets.
virtual std::string name() const override
Name of encoding.
const CodePoints & terminators() const
Property: string termination code points.
Definition String.h:959
Sawyer::SharedPointer< TerminatedString > Ptr
Shared ownership pointer to a TerminatedString.
Definition String.h:916
CodePoints & terminators()
Property: string termination code points.
Definition String.h:960
virtual State decode(Octet) override
Decode one octet.
virtual void reset() override
Reset the state machine to an initial state.
virtual StringEncodingScheme::Ptr clone() const override
Create a new copy of this encoder.
Definition String.h:930
virtual std::string name() const override
Name of encoder.
Definition String.h:519
virtual CodeValues encode(CodePoint cp) override
Encode a code point into a sequence of one or more code values.
Sawyer::SharedPointer< Utf16CharacterEncodingForm > Ptr
Shared-ownership pointer to a Utf16CharacterEncodingForm.
Definition String.h:515
virtual State decode(CodeValue) override
Decode one code value.
virtual CodePoint consume() override
Consume a decoded code point.
virtual void reset() override
Reset the decoder state machine.
virtual CharacterEncodingForm::Ptr clone() const override
Create a new encoder from this one.
Definition String.h:518
virtual CodePoint consume() override
Consume a decoded code point.
Sawyer::SharedPointer< Utf8CharacterEncodingForm > Ptr
Shared-ownership pointer to a Utf8CharacterEncodingForm.
Definition String.h:490
virtual CharacterEncodingForm::Ptr clone() const override
Create a new encoder from this one.
Definition String.h:493
virtual CodeValues encode(CodePoint cp) override
Encode a code point into a sequence of one or more code values.
virtual std::string name() const override
Name of encoder.
Definition String.h:494
virtual void reset() override
Reset the decoder state machine.
virtual State decode(CodeValue) override
Decode one code value.
Base class for all ROSE exceptions.
A collection of related switch declarations.
Constraints are used to select addresses from a memory map.
Definition AddressMap.h:76
Value size() const
Size of interval.
Definition Interval.h:302
T least() const
Returns lower limit.
Definition Interval.h:218
Collection of streams.
Definition Message.h:1606
Holds a value or nothing.
Definition Optional.h:54
Base class for reference counted objects.
Reference-counting intrusive smart pointer.
Represents an interpretation of a binary container.
@ ORDER_UNSPECIFIED
Endianness is unspecified and unknown.
Definition ByteOrder.h:21
PrintableAscii::Ptr printableAscii()
Returns a new printable ASCII predicate.
Utf8CharacterEncodingForm::Ptr utf8CharacterEncodingForm()
Returns a new UTF-8 character encoding form.
BasicCharacterEncodingScheme::Ptr basicCharacterEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex=ByteOrder::ORDER_UNSPECIFIED)
Returns a new basic character encoding scheme.
AnyCodePoint::Ptr anyCodePoint()
Returns a new predicate that matches all code points.
TerminatedString::Ptr nulTerminatedPrintableAscii()
Returns a new encoder for NUL-terminated printable ASCII strings.
Utf16CharacterEncodingForm::Ptr utf16CharacterEncodingForm()
Returns a new UTF-16 character encoding form.
std::vector< CodePoint > CodePoints
A sequence of code points, i.e., a string.
Definition String.h:184
@ USER_DEFINED_0
First user-defined value.
Definition String.h:395
@ COMPLETED_STATE
Completed state, but not a final state.
Definition String.h:392
@ USER_DEFINED_MAX
Maximum user-defined value.
Definition String.h:398
@ INITIAL_STATE
Initial state just after a reset.
Definition String.h:393
@ ERROR_STATE
Decoder is in an error condition.
Definition String.h:394
@ FINAL_STATE
Final state where nothing more can be decoded.
Definition String.h:391
@ USER_DEFINED_2
Third user-defined value.
Definition String.h:397
@ USER_DEFINED_1
Second user-defined value.
Definition String.h:396
bool isDone(State st)
Returns true for COMPLETED_STATE or FINAL_STATE.
void initDiagnostics()
Initialize the diagnostics facility.
LengthEncodedString::Ptr lengthEncodedPrintableAscii(size_t lengthSize, ByteOrder::Endianness order=ByteOrder::ORDER_UNSPECIFIED)
Returns a new encoder for length-encoded printable ASCII strings.
Sawyer::Message::Facility mlog
Diagnostics specific to string analysis.
uint8_t Octet
One byte in a sequence that encodes a code value.
Definition String.h:179
std::vector< Octet > Octets
A sequence of octets.
Definition String.h:180
std::vector< CodeValue > CodeValues
A sequence of code values.
Definition String.h:182
unsigned CodeValue
One value in a sequence that encodes a code point.
Definition String.h:181
LengthEncodedString::Ptr lengthEncodedPrintableAsciiWide(size_t lengthSize, ByteOrder::Endianness order, size_t charSize)
Returns a new encoder for multi-byte length-encoded printable ASCII strings.
BasicLengthEncodingScheme::Ptr basicLengthEncodingScheme(size_t octetsPerValue, ByteOrder::Endianness sex=ByteOrder::ORDER_UNSPECIFIED)
Returns a new basic length encoding scheme.
unsigned CodePoint
One character in a coded character set.
Definition String.h:183
TerminatedString::Ptr nulTerminatedPrintableAsciiWide(size_t charSize, ByteOrder::Endianness order)
Returns a new encoder for multi-byte NUL-terminated printable ASCII strings.
NoopCharacterEncodingForm::Ptr noopCharacterEncodingForm()
Returns a new no-op character encoding form.
LengthEncodedString::Ptr lengthEncodedString(const LengthEncodingScheme::Ptr &les, const CharacterEncodingForm::Ptr &cef, const CharacterEncodingScheme::Ptr &ces, const CodePointPredicate::Ptr &cpp)
Returns a new length-prefixed string encoder.
std::uint64_t Address
Address.
Definition Address.h:11
The ROSE library.
unsigned MatchFlags
Flags for matching constraints.
Definition AddressMap.h:46
size_t maxOverlap
Whether to allow overlapping strings.
Definition String.h:1056
size_t maxLength
Maximum length of matched strings.
Definition String.h:1039
bool keepingOnlyLongest
Whether to keep only longest non-overlapping strings.
Definition String.h:1063
size_t minLength
Minimum length of matched strings.
Definition String.h:1033